Skip to content

Commit

Permalink
[API] Add image gen v4 preview and remove tests for image gen v1
Browse files Browse the repository at this point in the history
  • Loading branch information
Aedial committed Jan 7, 2025
1 parent 15b581f commit 987cb51
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 20 deletions.
44 changes: 44 additions & 0 deletions example/generate_image_v4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
{filename}
==============================================================================
| Example of how to generate an image
|
| The resulting images will be placed in a folder named "results"
"""

import asyncio
from pathlib import Path

from example.boilerplate import API
from novelai_api.ImagePreset import ImageModel, ImagePreset, UCPreset


async def main():
d = Path("results")
d.mkdir(exist_ok=True)

async with API() as api_handler:
api = api_handler.api

model = ImageModel.Anime_v4_preview
preset = ImagePreset.from_default_config(model)
preset.seed = 42
preset.uc_preset = UCPreset.Preset_Heavy
preset.quality_toggle = False

# even though we give positions, the model can ignore them
preset.characters = [
# prompt, uc, position
{"prompt": "1girl", "position": "A3"},
{"prompt": "1boy"}, # default position is "C3"
]

# "1girl, 1boy" + quality tags without "rating:general"
prompt = "1girl, 1boy, best quality, very aesthetic, absurdres"
async for _, img in api.high_level.generate_image(prompt, model, preset):
(d / f"image_v4.png").write_bytes(img)


if __name__ == "__main__":
asyncio.run(main())
32 changes: 22 additions & 10 deletions example/generate_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,33 @@ async def main():
# model = Model.Sigurd
# model = Model.Euterpe
# model = Model.Krake
model = Model.Clio
# model = Model.Clio
# model = Model.Kayra
model = Model.Erato

# NOTE: plain text prompt
prompt = PREAMBLE[model]
# NOTE: preamble should be the start. Look at the PREAMBLE variable in Preset.py for the correct preamble
# prompt = PREAMBLE[model] + "Suddenly,"
# NOTE: prompt encoded in tokens
# prompt = Tokenizer.encode(model, PREAMBLE[model])

# NOTE: empty preset
preset = Preset("preset", model, {})
# NOTE: instantiation from default (presets/presets_6B_v4/default.txt)
# preset = Preset.from_default(model)
# NOTE: instantiation from official file (presets/presets_6B_v4)
# NOTE: instantiation from official file (in presets/ folder)
# preset = Preset.from_official(model, "Storywriter")
# NOTE: instantiation from file
# NOTE: instantiation from file (note that each preset is for a specific model)
# preset = Preset.from_file("novelai_api/presets/presets_6B_v4/Storywriter.txt")
# NOTE: instantiation of a new reset
# preset = Preset("new preset", model)

# NOTE: modification of the preset
preset.min_length = 1
preset.max_length = 20
# NOTE: context size is allowed_max_tokens - output_length - 20 (if generate_until_sentence is True)
# e.g. 8192 - 50 - 20 = 8122
preset.max_length = 50

# NOTE: instantiate with arguments
global_settings = GlobalSettings(num_logprobs=GlobalSettings.NO_LOGPROBS)
Expand All @@ -60,7 +67,7 @@ async def main():
# bad_words = BanList()
# NOTE: ban list with elements in it
# bad_words = BanList(" cat", " dog", " boy")
# NOTE: disabled ban list with elements in it
# NOTE: disabled ban list with elements in it (if you want to control it with a condition)
# bad_words = BanList(" cat", " dog", " boy", enabled = False)
# NOTE: add elements to the bias list
if bad_words is not None:
Expand Down Expand Up @@ -97,6 +104,11 @@ async def main():
# NOTE: stop sequence as tokens
# stop_sequence = Tokenizer.encode(model, ["The End", "THE END", "\n"])

# NOTE: for all models, but Erato
bytes_per_token = 2
# NOTE: for Erato (because of Llama 3)
# bytes_per_token = 4

# NOTE: normal generation
gen = await api.high_level.generate(
prompt,
Expand All @@ -111,9 +123,9 @@ async def main():
# NOTE: b64-encoded list of tokens ids
logger.info(gen["output"])
# NOTE: list of token ids
logger.info(b64_to_tokens(gen["output"]))
logger.info(b64_to_tokens(gen["output"], bytes_per_token))
# NOTE: decoded response
logger.info(Tokenizer.decode(model, b64_to_tokens(gen["output"])))
logger.info(Tokenizer.decode(model, b64_to_tokens(gen["output"], bytes_per_token)))

# NOTE: streamed generation
async for token in api.high_level.generate_stream(
Expand All @@ -131,9 +143,9 @@ async def main():
# NOTE: b64-encoded token id
token["token"],
# NOTE: token id
b64_to_tokens(token["token"]),
# NOTE: decoded token
Tokenizer.decode(model, b64_to_tokens(token["token"])),
b64_to_tokens(token["token"], bytes_per_token),
# NOTE: decoded token (do note that decoding single tokens can yield broken unicode characters)
Tokenizer.decode(model, b64_to_tokens(token["token"], bytes_per_token)),
)

# ... and more examples can be found in tests/test_generate.py
Expand Down
100 changes: 98 additions & 2 deletions novelai_api/ImagePreset.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class ImageModel(enum.Enum):
Furry_v3 = "nai-diffusion-furry-3"
Inpainting_Furry_v3 = "nai-diffusion-furry-3-inpainting"

Anime_v4_preview = "nai-diffusion-4-curated-preview"


class ControlNetModel(enum.Enum):
"""
Expand Down Expand Up @@ -93,6 +95,19 @@ class ImageResolution(enum.Enum):
Large_Landscape_v3 = (1536, 1024)
Large_Square_v3 = (1472, 1472)

# v4
Small_Portrait_v4 = (512, 768)
Small_Landscape_v4 = (768, 512)
Small_Square_v4 = (640, 640)

Normal_Portrait_v4 = (832, 1216)
Normal_Landscape_v4 = (1216, 832)
Normal_Square_v4 = (1024, 1024)

Large_Portrait_v4 = (1024, 1536)
Large_Landscape_v4 = (1536, 1024)
Large_Square_v4 = (1472, 1472)


class ImageSampler(enum.Enum):
"""
Expand Down Expand Up @@ -199,6 +214,15 @@ class ImagePreset:
"compression artifacts, unknown text",
UCPreset.Preset_None: "lowres",
},
# v4
ImageModel.Anime_v4_preview: {
UCPreset.Preset_Heavy: "blurry, lowres, error, film grain, scan artifacts, worst quality, bad quality, "
"jpeg artifacts, very displeasing, chromatic aberration, logo, dated, signature, multiple views, "
"gigantic breasts",
UCPreset.Preset_Light: "blurry, lowres, error, worst quality, bad quality, jpeg artifacts, "
"very displeasing, logo, dated, signature",
UCPreset.Preset_None: "lowres",
},
}

# inpainting presets are the same as the normal ones
Expand All @@ -221,6 +245,7 @@ class ImagePreset:
}

# type completion for __setitem__ and __getitem__

#: https://docs.novelai.net/image/qualitytags.html
quality_toggle: bool
#: Automatically uses SMEA when image is above 1 megapixel
Expand Down Expand Up @@ -281,13 +306,23 @@ class ImagePreset:
reference_information_extracted_multiple: List[float]
#: reference_strength for multi-vibe transfer
reference_strength_multiple: List[float]
#:
#: https://blog.novelai.net/summer-sampler-update-en-3a34eb32b613
variety_plus: bool
#: Whether the AI should strictly follow the positions of the characters or have some freedom
use_coords: bool

#: https://docs.novelai.net/image/multiplecharacters.html#multi-character-prompting
#: layout = {"prompt": ..., "uc": ..., "position": ... ("A1" to "E5", "C3" is default)}
characters: List[Dict[str, str]]

#: Use the old behavior of prompt separation at the 75 tokens mark (can cut words in half)
legacy_v3_extend: bool
#: ???
#: Revision of the default arguments
params_version: int
#: Use the old behavior of noise scheduling with the k_euler_ancestral sampler
deliberate_euler_ancestral_bug: bool
#: ???
prefer_brownian: bool

_settings: Dict[str, Any]

Expand Down Expand Up @@ -360,6 +395,14 @@ def from_v3_furry_config(cls):

return cls.from_file(Path(__file__).parent / "image_presets" / "presets_v3" / "default_furry.preset")

@classmethod
def from_v4_config(cls):
"""
Create a new ImagePreset with the default settings from the v4 config
"""

return cls.from_file(Path(__file__).parent / "image_presets" / "presets_v4" / "default.preset")

@classmethod
def from_default_config(cls, model: ImageModel) -> "ImagePreset":
"""
Expand All @@ -383,6 +426,8 @@ def from_default_config(cls, model: ImageModel) -> "ImagePreset":
return cls.from_v3_config()
elif model in (ImageModel.Furry_v3, ImageModel.Inpainting_Furry_v3):
return cls.from_v3_furry_config()
elif model in (ImageModel.Anime_v4_preview,):
return cls.from_v4_config()

def __setitem__(self, key: str, value: Any):
if key not in self._TYPE_MAPPING:
Expand Down Expand Up @@ -466,6 +511,7 @@ def to_settings(self, model: ImageModel) -> Dict[str, Any]:

settings = copy.deepcopy(self._settings)

# size
resolution: Union[ImageResolution, Tuple[int, int]] = settings.pop("resolution")
if isinstance(resolution, ImageResolution):
resolution: Tuple[int, int] = resolution.value
Expand All @@ -480,6 +526,7 @@ def to_settings(self, model: ImageModel) -> Dict[str, Any]:
settings["seed"] = seed
settings["extra_noise_seed"] = seed

# UC
uc_preset: Union[UCPreset, None] = settings.pop("uc_preset")
if uc_preset is None:
default_uc = ""
Expand All @@ -492,6 +539,7 @@ def to_settings(self, model: ImageModel) -> Dict[str, Any]:
combined_uc = f"{default_uc}, {uc}" if default_uc and uc else default_uc if default_uc else uc
settings["negative_prompt"] = combined_uc

# sampler
sampler: ImageSampler = settings.pop("sampler")
if sampler is ImageSampler.ddim and model in (ImageModel.Anime_v3,):
sampler = ImageSampler.ddim_v3
Expand All @@ -508,6 +556,54 @@ def to_settings(self, model: ImageModel) -> Dict[str, Any]:
settings["dynamic_thresholding"] = settings.pop("decrisper")
settings["skip_cfg_above_sigma"] = 19 if settings.pop("variety_plus", False) else None

# character prompts
if model in (ImageModel.Anime_v4_preview,):
settings["v4_prompt"] = {
# base_caption is set later, in generate_image
"caption": {"base_caption": None, "char_captions": []},
"use_coords": self.use_coords,
"use_order": True,
}
settings["v4_negative_prompt"] = {"caption": {"base_caption": combined_uc, "char_captions": []}}

characters = settings.pop("characters", [])
if not isinstance(characters, list):
raise ValueError("characters must be a list of dictionaries")

settings["characterPrompts"] = []

for i, character in enumerate(characters):
if not isinstance(character, dict):
raise ValueError(f"character #{i} must be a dictionary")

if "prompt" not in character:
raise ValueError(f"character #{i} must have at least a 'prompt' key")

prompt = character["prompt"]
if not isinstance(prompt, str):
raise ValueError(f"character #{i} prompt must be a string")

negative = character.get("uc", "")

character_position = character.get("position", "") or "C3"
if (
len(character_position) != 2
or character_position[0] not in "ABCDE"
or character_position[1] not in "12345"
):
raise ValueError(f'character #{i} position must be a valid position ("", or "A1" to "E5")')

pos = {
"x": round(0.5 + 0.2 * (ord(character_position[0]) - ord("C")), 1),
"y": round(0.5 + 0.2 * (ord(character_position[1]) - ord("3")), 1),
}

settings["characterPrompts"].append({"center": pos, "prompt": prompt, "uc": negative})
settings["v4_prompt"]["caption"]["char_captions"].append({"centers": [pos], "char_caption": prompt})
settings["v4_negative_prompt"]["caption"]["char_captions"].append(
{"centers": [pos], "char_caption": negative}
)

# special arguments kept for metadata purposes (no effect on result)
settings["qualityToggle"] = settings.pop("quality_toggle")

Expand Down
2 changes: 1 addition & 1 deletion novelai_api/Preset.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ class Preset(metaclass=_PresetMetaclass):
stop_sequences: List[List[int]]
#: https://naidb.miraheze.org/wiki/Generation_Settings#Randomness_(Temperature)
temperature: float
#: Response length, if no interrupted by a Stop Sequence
#: Response length, if not interrupted by a Stop Sequence
max_length: int
#: Minimum number of token, if interrupted by a Stop Sequence
min_length: int
Expand Down
5 changes: 5 additions & 0 deletions novelai_api/_high_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,11 @@ async def generate_image(
ImageModel.Inpainting_Furry_v3,
):
prompt = f"{prompt}, best quality, amazing quality, very aesthetic, absurdres"
elif model is ImageModel.Anime_v4_preview:
prompt = f"{prompt}, rating:general, best quality, very aesthetic, absurdres"

if "v4_prompt" in settings:
settings["v4_prompt"]["caption"]["base_caption"] = prompt

async for e in self._parent.low_level.generate_image(prompt, model, action, settings):
yield e
Expand Down
31 changes: 31 additions & 0 deletions novelai_api/image_presets/presets_v4/default.preset
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"resolution": "Normal_Portrait_v3",
"scale": 6,
"sampler": "k_euler_ancestral",
"steps": 23,
"n_samples": 1,
"strength": 0.7,
"noise": 0,
"uc_preset": "Preset_Heavy",
"quality_toggle": true,
"smea": false,
"smea_dyn": false,
"decrisper": false,
"controlnet_strength": 1,
"legacy": false,
"add_original_image": true,
"uncond_scale": 1,
"cfg_rescale": 0,
"noise_schedule": "karras",
"reference_image_multiple": [],
"reference_information_extracted_multiple": [],
"reference_strength_multiple": [],
"use_coords": true,
"legacy_v3_extend": false,
"deliberate_euler_ancestral_bug": false,
"prefer_brownian": true,
"params_version": 3,

"seed": 0,
"uc": ""
}
Loading

0 comments on commit 987cb51

Please sign in to comment.