From 15859f2357059ef488405e5336d2c6e5d246687b Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Sun, 15 Dec 2024 11:03:06 +0800 Subject: [PATCH 01/72] [[Misc]Upgrade bitsandbytes to the latest version 0.45.0 (#11201) --- Dockerfile | 2 +- docs/source/quantization/bnb.rst | 2 +- requirements-test.in | 2 +- requirements-test.txt | 2 +- vllm/model_executor/layers/quantization/bitsandbytes.py | 8 ++++---- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 682f046d4b6ec..c1b6e1bbfe354 100644 --- a/Dockerfile +++ b/Dockerfile @@ -218,7 +218,7 @@ FROM vllm-base AS vllm-openai # install additional dependencies for openai api server RUN --mount=type=cache,target=/root/.cache/pip \ - pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.44.0' timm==0.9.10 + pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.0' timm==0.9.10 ENV VLLM_USAGE_SOURCE production-docker-image diff --git a/docs/source/quantization/bnb.rst b/docs/source/quantization/bnb.rst index 682938cc63d48..84f805bb60c2a 100644 --- a/docs/source/quantization/bnb.rst +++ b/docs/source/quantization/bnb.rst @@ -11,7 +11,7 @@ Below are the steps to utilize BitsAndBytes with vLLM. .. code-block:: console - $ pip install bitsandbytes>=0.44.0 + $ pip install bitsandbytes>=0.45.0 vLLM reads the model's config file and supports both in-flight quantization and pre-quantized checkpoint. diff --git a/requirements-test.in b/requirements-test.in index 57fddb416317e..fb4179c3d8423 100644 --- a/requirements-test.in +++ b/requirements-test.in @@ -25,7 +25,7 @@ datamodel_code_generator # required for minicpm3 test lm-eval[api]==0.4.4 # required for model evaluation test # quantization -bitsandbytes>=0.44.0 +bitsandbytes>=0.45.0 buildkite-test-collector==0.1.9 numpy < 2.0.0 diff --git a/requirements-test.txt b/requirements-test.txt index c786a1249bddb..3771577fe8ed0 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -37,7 +37,7 @@ audioread==3.0.1 # via librosa awscli==1.35.23 # via -r requirements-test.in -bitsandbytes==0.44.1 +bitsandbytes>=0.45.0 # via -r requirements-test.in black==24.10.0 # via datamodel-code-generator diff --git a/vllm/model_executor/layers/quantization/bitsandbytes.py b/vllm/model_executor/layers/quantization/bitsandbytes.py index e01c713dd14db..5dc872933282c 100644 --- a/vllm/model_executor/layers/quantization/bitsandbytes.py +++ b/vllm/model_executor/layers/quantization/bitsandbytes.py @@ -145,12 +145,12 @@ class BitsAndBytesLinearMethod(LinearMethodBase): def __init__(self, quant_config: BitsAndBytesConfig): try: import bitsandbytes - if bitsandbytes.__version__ < "0.44.0": + if bitsandbytes.__version__ < "0.45.0": raise ImportError("bitsandbytes version is wrong. Please " - "install bitsandbytes>=0.44.0.") + "install bitsandbytes>=0.45.0.") except ImportError as err: - raise ImportError("Please install bitsandbytes>=0.44.0 via " - "`pip install bitsandbytes>=0.44.0` to use " + raise ImportError("Please install bitsandbytes>=0.45.0 via " + "`pip install bitsandbytes>=0.45.0` to use " "bitsandbytes quantizer.") from err self.quant_config = quant_config From a1c02058baf47be1a91ee743378a340ee1b10416 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sat, 14 Dec 2024 19:45:00 -0800 Subject: [PATCH 02/72] [torch.compile] allow tracking forward time (#11081) Signed-off-by: youkaichao --- vllm/forward_context.py | 61 ++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/vllm/forward_context.py b/vllm/forward_context.py index cd136f43c0c57..7f56575279e9b 100644 --- a/vllm/forward_context.py +++ b/vllm/forward_context.py @@ -1,9 +1,11 @@ import time -from collections import Counter +from collections import defaultdict from contextlib import contextmanager from dataclasses import dataclass from typing import Any, Dict, Optional +import torch + import vllm.envs as envs from vllm.config import VllmConfig from vllm.logger import init_logger @@ -11,9 +13,10 @@ logger = init_logger(__name__) track_batchsize: bool = envs.VLLM_LOG_BATCHSIZE_INTERVAL >= 0 -batchsize_counter: Counter = Counter() last_logging_time: float = 0 +forward_start_time: float = 0 batchsize_logging_interval: float = envs.VLLM_LOG_BATCHSIZE_INTERVAL +batchsize_forward_time: defaultdict = defaultdict(list) @dataclass @@ -40,23 +43,10 @@ def set_forward_context(context: Any, vllm_config: VllmConfig): can be attention metadata, etc. Here we can inject common logic for every model forward pass. """ - global track_batchsize, batchsize_counter - global last_logging_time, batchsize_logging_interval - if track_batchsize and context is not None: - if hasattr(context, "num_prefill_tokens"): - # for v0 attention backends - batchsize = context.num_prefill_tokens + context.num_decode_tokens - else: - # for v1 attention backends - batchsize = context.num_input_tokens - batchsize_counter[batchsize] += 1 - if time.monotonic() - last_logging_time > batchsize_logging_interval: - last_logging_time = time.monotonic() - sorted_data = sorted(batchsize_counter.items(), - key=lambda x: x[1], - reverse=True) - logger.info("Batchsize distribution (batchsize, count): %s", - sorted_data) + global forward_start_time + need_to_track_batchsize = track_batchsize and context is not None + if need_to_track_batchsize: + forward_start_time = time.perf_counter() global _forward_context prev_context = _forward_context _forward_context = ForwardContext( @@ -66,4 +56,37 @@ def set_forward_context(context: Any, vllm_config: VllmConfig): try: yield finally: + global batchsize_counter + global last_logging_time, batchsize_logging_interval + if need_to_track_batchsize: + if hasattr(context, "num_prefill_tokens"): + # for v0 attention backends + batchsize = context.num_prefill_tokens + \ + context.num_decode_tokens + else: + # for v1 attention backends + batchsize = context.num_input_tokens + # we use synchronous scheduling right now, + # adding a sync point here should not affect + # scheduling of the next batch + torch.cuda.synchronize() + now = time.perf_counter() + # time measurement is in milliseconds + batchsize_forward_time[batchsize].append( + (now - forward_start_time) * 1000) + if now - last_logging_time > batchsize_logging_interval: + last_logging_time = now + forward_stats = [] + for bs, times in batchsize_forward_time.items(): + if len(times) <= 1: + # can be cudagraph / profiling run + continue + medium = torch.quantile(torch.tensor(times), q=0.5).item() + medium = round(medium, 2) + forward_stats.append((bs, len(times), medium)) + forward_stats.sort(key=lambda x: x[1], reverse=True) + if forward_stats: + logger.info(("Batchsize forward time stats " + "(batchsize, count, median_time(ms)): %s"), + forward_stats) _forward_context = prev_context From b10609e6a11554be61976981304984510a0469c9 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sun, 15 Dec 2024 14:30:28 +0800 Subject: [PATCH 03/72] [Misc] Clean up multi-modal processor (#11207) Signed-off-by: DarkLight1337 --- examples/offline_inference_vision_language.py | 5 +- tests/multimodal/test_processing.py | 17 ++++--- vllm/multimodal/processing.py | 48 +++++++++---------- 3 files changed, 32 insertions(+), 38 deletions(-) diff --git a/examples/offline_inference_vision_language.py b/examples/offline_inference_vision_language.py index 45539c665a922..7bc43242b717e 100644 --- a/examples/offline_inference_vision_language.py +++ b/examples/offline_inference_vision_language.py @@ -92,10 +92,7 @@ def run_fuyu(question: str, modality: str): def run_phi3v(question: str, modality: str): assert modality == "image" - prompt = f"<|user|>\n<|image_1|>\n{question}<|end|>\n<|assistant|>\n" # noqa: E501 - # Note: The default setting of max_num_seqs (256) and - # max_model_len (128k) for this model may cause OOM. - # You may lower either to run this example on lower-end GPUs. + prompt = f"<|user|>\n<|image_1|>\n{question}<|end|>\n<|assistant|>\n" # num_crops is an override kwarg to the multimodal image processor; # For some models, e.g., Phi-3.5-vision-instruct, it is recommended diff --git a/tests/multimodal/test_processing.py b/tests/multimodal/test_processing.py index 6aaa80ddc9fa5..d22d778f81fa8 100644 --- a/tests/multimodal/test_processing.py +++ b/tests/multimodal/test_processing.py @@ -2,10 +2,9 @@ import pytest -from vllm.multimodal.processing import (MultiModalDataItems, PromptReplacement, - _PlaceholderInfo, find_text_matches, - find_token_matches, iter_placeholders, - iter_token_matches, +from vllm.multimodal.processing import (PromptReplacement, _PlaceholderInfo, + find_text_matches, find_token_matches, + iter_placeholders, iter_token_matches, replace_text_matches, replace_token_matches) from vllm.transformers_utils.tokenizer import AnyTokenizer @@ -314,8 +313,8 @@ def test_find_replace_text( result = replace_text_matches( prompt, matches, - MultiModalDataItems({key: [None] * mm_count - for key in repl_by_key}), + {key: mm_count + for key in repl_by_key}, ) # Only displayed on error @@ -380,8 +379,8 @@ def test_find_replace_tokens( result = replace_token_matches( prompt, matches, - MultiModalDataItems({key: [None] * mm_count - for key in repl_by_key}), + {key: mm_count + for key in repl_by_key}, ) # Only displayed on error @@ -476,7 +475,7 @@ def test_iter_placeholders( prompt_repls, prompt, # Effectively match all occurrences in the prompt - MultiModalDataItems({key: [None] * 3 for key in repl_by_key}), + {key: 3 for key in repl_by_key}, )) # Only displayed on error diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index de5a002d474c2..ce6bec1d49aac 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -403,18 +403,17 @@ def _resolve_matches( def _replace_matches( prompt: _S, matches: Sequence[_PromptReplacementMatch], - mm_items: MultiModalDataItems, + mm_item_counts: Mapping[str, int], ) -> list[_S]: out_seqs = list[_S]() prev_end_idx = 0 - next_idx_by_modality = {modality: 0 for modality in mm_items} + next_idx_by_modality = {modality: 0 for modality in mm_item_counts} for match in _resolve_matches(prompt, matches): modality = match.modality - modal_items = mm_items[modality] item_idx = next_idx_by_modality[modality] - if item_idx >= len(modal_items): + if item_idx >= mm_item_counts[modality]: continue start_idx = match.start_idx @@ -441,13 +440,13 @@ def _replace_matches( def replace_token_matches( prompt: list[int], matches: Sequence[_PromptReplacementTokenMatch], - mm_items: MultiModalDataItems, + mm_item_counts: Mapping[str, int], ) -> list[int]: """Apply :code:`prompt_repls` to :code:`prompt`.""" if not matches: return prompt - token_id_seqs = _replace_matches(prompt, matches, mm_items) + token_id_seqs = _replace_matches(prompt, matches, mm_item_counts) return flatten_2d_lists(token_id_seqs) @@ -455,13 +454,13 @@ def replace_token_matches( def replace_text_matches( prompt: str, matches: Sequence[_PromptReplacementTextMatch], - mm_items: MultiModalDataItems, + mm_item_counts: Mapping[str, int], ) -> str: """Apply :code:`prompt_repls` to :code:`prompt`.""" if not matches: return prompt - texts = _replace_matches(prompt, matches, mm_items) + texts = _replace_matches(prompt, matches, mm_item_counts) return "".join(texts) @@ -470,9 +469,9 @@ def _iter_modality_placeholders( prompt: list[int], modality: str, modality_repls: Sequence[_BoundPromptReplacement], - modal_items: list[Any], + modal_item_count: int, ) -> Iterable[_PlaceholderInfo]: - if len(modal_items) == 0: + if modal_item_count == 0: return prompt_len = len(prompt) @@ -499,7 +498,7 @@ def _iter_modality_placeholders( ) item_index += 1 - if item_index >= len(modal_items): + if item_index >= modal_item_count: return # Exclude overlapping matches @@ -514,7 +513,7 @@ def _iter_modality_placeholders( def iter_placeholders( prompt_repls: Sequence[_BoundPromptReplacement], prompt: list[int], - mm_items: MultiModalDataItems, + mm_item_counts: Mapping[str, int], ) -> Iterable[_PlaceholderInfo]: """ Yield each set of placeholder tokens found in :code:`prompt`. @@ -523,13 +522,13 @@ def iter_placeholders( """ repls_by_modality = dict(full_groupby_modality(prompt_repls)) - for modality, modal_items in mm_items.items(): + for modality, modal_item_count in mm_item_counts.items(): if modality in repls_by_modality: yield from _iter_modality_placeholders( prompt, modality, repls_by_modality[modality], - modal_items, + modal_item_count, ) @@ -590,10 +589,10 @@ def _find_placeholders( self, all_prompt_repls: Sequence[_BoundPromptReplacement], new_token_ids: list[int], - mm_items: MultiModalDataItems, + mm_item_counts: Mapping[str, int], ) -> list[_PlaceholderInfo]: return list( - iter_placeholders(all_prompt_repls, new_token_ids, mm_items)) + iter_placeholders(all_prompt_repls, new_token_ids, mm_item_counts)) def _apply_hf_processor( self, @@ -655,10 +654,9 @@ def _bind_prompt_replacements( def _apply_prompt_replacements( self, - mm_items: MultiModalDataItems, - hf_inputs: BatchFeature, token_ids: list[int], prompt_repls: Sequence[_BoundPromptReplacement], + mm_item_counts: Mapping[str, int], ) -> tuple[list[int], str, list[_PlaceholderInfo]]: tokenizer = self._get_tokenizer() @@ -675,13 +673,13 @@ def _apply_prompt_replacements( # of the search text in the prompt, we instead perform string # replacement on the decoded token IDs, then encode them back. if all( - len(matches) >= len(mm_items[modality]) + len(matches) >= mm_item_counts[modality] for modality, matches in full_groupby_modality(token_matches) ): # yapf: disable token_ids = replace_token_matches( token_ids, token_matches, - mm_items, + mm_item_counts, ) text = _decode(tokenizer, token_ids) @@ -693,14 +691,14 @@ def _apply_prompt_replacements( text = replace_text_matches( text, text_matches, - mm_items, + mm_item_counts, ) token_ids = _encode(tokenizer, text) matched_repls = [match.prompt_repl for match in text_matches] placeholders = self._find_placeholders(matched_repls, token_ids, - mm_items) + mm_item_counts) return token_ids, text, placeholders @@ -737,8 +735,9 @@ def apply( # If HF processor already inserts placeholder tokens, # there is no need for us to insert them + mm_item_counts = {m: len(items) for m, items in mm_items.items()} all_placeholders = self._find_placeholders(all_prompt_repls, - prompt_ids, mm_items) + prompt_ids, mm_item_counts) if all_placeholders: prompt_text = _decode(tokenizer, prompt_ids) @@ -748,10 +747,9 @@ def apply( prompt_text, all_placeholders, ) = self._apply_prompt_replacements( - mm_items, - hf_inputs, prompt_ids, all_prompt_repls, + mm_item_counts, ) mm_placeholders = { From 96d673e0f897aa8eec234e690c9c5425782d6ffb Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Mon, 16 Dec 2024 01:59:42 +0800 Subject: [PATCH 04/72] [Bugfix] Fix error handling of unsupported sliding window (#11213) Signed-off-by: DarkLight1337 --- vllm/model_executor/models/llama.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 733b1bc7d80ac..2902e6999c2fd 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -169,13 +169,15 @@ def __init__( ) if hasattr(config, "interleaved_sliding_window"): - if isinstance(config.interleaved_sliding_window, int): - sliding_window = config.interleaved_sliding_window - elif isinstance(config.interleaved_sliding_window, list): - sw_idx = layer_idx % len(config.interleaved_sliding_window) - sliding_window = config.interleaved_sliding_window[sw_idx] + interleaved_sliding_window = config.interleaved_sliding_window + if isinstance(interleaved_sliding_window, int): + sliding_window = interleaved_sliding_window + elif isinstance(interleaved_sliding_window, list): + sw_idx = layer_idx % len(interleaved_sliding_window) + sliding_window = interleaved_sliding_window[sw_idx] else: - raise ValueError(f"{type(sliding_window)} is not supported.") + raise ValueError( + f"{type(interleaved_sliding_window)} is not supported.") else: sliding_window = None From 38e599d6a84bb7477030a5488035cd23f529b644 Mon Sep 17 00:00:00 2001 From: Kuntai Du Date: Sun, 15 Dec 2024 13:31:16 -0600 Subject: [PATCH 05/72] [Doc] add documentation for disaggregated prefilling (#11197) Signed-off-by: Kuntai Du --- .../usage/disagg_prefill/abstraction.jpg | Bin 0 -> 104673 bytes .../assets/usage/disagg_prefill/overview.jpg | Bin 0 -> 177439 bytes docs/source/index.rst | 1 + docs/source/usage/disagg_prefill.rst | 69 ++++++++++++++++++ 4 files changed, 70 insertions(+) create mode 100644 docs/source/assets/usage/disagg_prefill/abstraction.jpg create mode 100644 docs/source/assets/usage/disagg_prefill/overview.jpg create mode 100644 docs/source/usage/disagg_prefill.rst diff --git a/docs/source/assets/usage/disagg_prefill/abstraction.jpg b/docs/source/assets/usage/disagg_prefill/abstraction.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1a99e3ed8cf5f3b6679196752896fca94a22a4a4 GIT binary patch literal 104673 zcmeFa2UHW^x+p&MF4B<}DbkB{Q6wVLLF$`|JHksOy`p=+sUYz$s4U4z}-146t4Lji!Y!X|I5rw22?WM*J|K_8$3v11c* z2@3L~J`4c<0inT{49_05vv)Yk@C9H2Sb@WUJRs=e8WN;yZhqlctAAd9!v8pMAAimh z0E{dAZ0jH8|0RgS%{{~woMm&c_EonKSAP(01W%l6P%t=VnkukdJR&scC;S0~xr4z0 zg7BxGu;(8z{wI9(7fj3gtB%zrJ#d1Lf{V%O;u7o$0Ic$0`O$D!Pp}=<3m|+vz|A`V zgyA5p$0aq`Y{Z;M_V)qy96=r1p7wj6M z2hQam_qw^9Gx-Y+@VoePj()Z2;cf7fkbc45p~hB!!R{gYmVd#)ftUX7%OhA%_pfr7 zU=aHA-Y{S5zstScjsI@{x|fyhUv7V_2c^m(IZ>X1*(O>1RF8>(6dw}jg+6=M#du(p*=k))o?;c?NH@-ok zXaDXi$nV$Q_%%Q7eujUShlE@HeQ#*6)!*eoq2_Vmt?{itz(tD^i-mE$UE0Pu6~ z{;UH4-z3_Js>fqI|?=m5Haeqadr089e2z!Iv!nXQ<8m~SyZWUgWEW1eIF#=^oP#-hby z!Q#zwljR}H3zj!5%Pa@1oUF2}daRdO!&uW;%UNHu&ai%CV`GzKJHzI{7Rr{!_MEMU zZGjDci0hEzA>%`ChY*MI4mBSdJM@*EiTxP6F1s`Pb@pub2KEtl^kJsMl85yUyBv-= zoOihO@YLb&99$gA9A+H;9H|@?9D^KRI2kx4IrTZ+IO8~rIeR!)xoEh=x%9YPxo&fn zaP@Jmano~4aT{~{aHnurbHC@t@Nn{|@!0T0^5pTn;#uOQ;g#ey=Jn%E=dI`c$oqp& zm`|6_oiB;6hHs2-pI?yw48J>nGJh@qB>#^iM~~&+5a|@z zfN(){Al{G%5F}*tD9=&dqkcyp9_>Af5fu_O5)BhA6n!sB5R($K7K;_D6`L1l7C#~G zA^t$TTO1<+kua5jOH@jrjxilOam?#j&ar`G`;trIis^qCm zsIsXVs3KHfs$$jT)ZEpcs7CLy{6y-B zH(GRBXSEPo9a;w`HBN?}tUtMPO5v36sftryw57D&wM(>DPm7&)IbC>qN$04}6`iL# zOHfg$3$zHjaz^}&+nKU6>$)3!3=i^nboTx>A|%#6+Mo6Va`nO`$+x1hH$x5%^jd`bBd{L+9WmnF=y z!V+h7)+*g<)>_&+%(~l#-R82*bDMqJbGDhbOLj_jH|&P&1?;`;Upg>2*f^9qe0S7$ z%yL}2tbRG+@<%5rr%0zkX8~tF=T|UxmCqd{^(2|-KOPF~Bph6y$aE(@UwaSC}E$_*+>@4}?R z;=`81b;9$)@ewu=4Urs?0g>;bWTTRz)~_2}FO8;+c8%_ai^6Zgmv89aD2$$K8xujW>+1OgNMfoG_JmDzV@W?H%ts z?~_!M9wm{J-ICv?98bwkA*Q;fzP+n__u*Yinn&9EbdB^U_vr5h-20dT&3Kl{k$F9H z^}gx-<_AX~BtO{8f@KY6t7RADFz1BjEIqvN@Z}?kNB15PbG>pW^UmhgJ{EeM^msqt zEr0CEnI|;`!UZV>Kc0F&oh~#iY%Y>4$}Xla4ln*(VpB3udZP4snLt@e8L2#=eCgSx zXT8rgpFghpa)R7=TZS}-n%}9v zYZ_J`ZhEirzIjA*!&mW%8UJzO+T9jF=UD8;3wR~>*-HPSP?5gYP)~Aq9#I?B3Y@f5& zA?we-sC?A1PJ6|_Y_Cv0k%9P-BtvQ=n~>)y{uD}(OR&o?UErr>MC}3E zHoN7Z{a_COT%cag?D&Un{j0_=yXj8|R7-zBe_a0q{D=Pi>o*MmsNn_x3pA+ROaZ`) z5&)0{+h@NF0Oo7}V5J36tE&Ee{@L6gIy(TkxCHrmo4dH_3@iSj<^K8mUzb1hdGOm` ztOnM zfI{`pgX-s9G&FSJ9}6Qr<6p)9^($o_wDA%tuK^B5s%FNkG*n^$H3t<92Nk84stBA@ zx<5wv7qZ|uY3b-07@3$^4g=IwG&Iz-G<0;dp!P;}Dn#)pCeSZS!h1*72rpa7XHU6c&`K7Y@_zoG%khu`$^ zujb`<*&q9bGWwf7e$&GLns-vh{-6)J6nw@i1z;gc;Uj9{me>vo;64ghFL&~3`R1zQ zRQ}~HjXX|LowIFD8OJANzvkW?ca0qn@)c3hTk0GH`n|v8-B~n-&(dPG9{7Y<=@TY2|O2gMA&&CDCCeND|voZBVVean!gKTmGcVTIdc z0tb+{$=vvL%%;st3PejzCjfzFL|r2wWFbA~V9xh>AnfhUAA{%81MIS?-kAr?b%Nfe;o zb!CU7etBwM9n(VOVJBM@5YAD68D(q)Ik(1(W?2b;l$}`Evjicf3{!woRa>-rj5Zhu z01#`3AOHnGI|q_va*;02fTCpS-{=3^b3n7ZNM`W`NidUC2t^xDP=GzB=qVz{I@v;x zpaGJV8fJi;w^U09Mzv62R7(QsWk%o(gOd21zk^#Q3M6N9@-Px-s;x``%J9-Od)Jet zpHhP-J7wZG@&22Lt2Ubg_)!4V>~bRd6hSjG2bYC!M28Mj)9qGYg(6?IKfJTldMtBX~Q`Mtt);*&DjF`qGbRUmi0IWrRD_wZy}Xu{wQW=DXYn+>F;^HT-vLB!@KT zSB1DP%6{#R=8NHuv?Q|(65x31fz^d3tffBb@t$b}N!185nUKg^7=e>Ir?ER{9OP{6 z&66bUcIgn)T@@avHR6iz4qo~~%wa4f4zX0ody-~LfVUXu1abaQ>Xf_oWWIlGdy1Ns zlS9{Tdg-$|pi267IX#`|S_(gj5oy8FIJu_{oPE(cp?Ku|q+PYEH~;aHw8@a(U7KP*Ikb9^x>`toYVY0|s!w`lsHG6QoX;rLM8l*WEGcESEleUEIS;n^eh zc5a6aX*tGuh$Vl1Iz`BLc*{bY*^T&GOhPQSYxU_11|7Fs&8t#FE9NPvu_Z^yv$KLv<*=H$0G zf1iD#n(CRn-UtQJ>O~FjBoH;Lr+oy{9`-3I_}92*`-a<~&fTf9XLM;yprO5r%o#zg zE*qxAoJMqYH$!Pcm5yx2Fpp_lV=4sAI82NE({Kc%gk6(Iey#;OvrG&8Jwv>ZD>D1` z@llCHZVG@CAmt;MGIXTzEQH{gZifpC^b?`A!ZY}2tA+&JCQiZNEm^BP^nsJ9_&wKH zJ3A&z8A)jT`KH6Iq^N~Whw!{LgqN8xvOuhd?_rnbo5e7`6GK8l4C-(2w^!18l$atD z#=CW7vAj8DJ}q0FsJEM6r2{0*g+8B{zkR$nAX1p)6&nHy#e_7yuX86Hfh%-iwx_1U z7-QtVr2B@{x$Sy53L6!p1GAc&4~=*AOOtAmOL4F|Zb&_QpHk9-rm1fqjHl>>I`pvF zYPH#=mp2Sntr}V~uZf?jXVGO*K_9daGZ!ACWfRur{!44t1SUaU+Mr;t=(52!6?kXKoq8pYJ zyBL{cG4=gaU)%WSR!RNl7CV?9{k*;F+sE0KH}9M~ADq07Yq^}B0!J)C0Y|;4)!~4i za|z4I=l1uNXXRcT>3(!X_h>i|aT3ub1@*r{vkXOV6KwI8Jx|o`uV~()0G{o$!@k`o z(~7T23`^cRUETZ9rffOE-fE^EsEa+4=Y_K1O9mOVE22xelLygd9K*womx&;9zq|~; zfUWHvN*Sz@m+Z)$H62%*_0C1{uO`eCpK1__w|X;L<#+SwJyovk*?0nEQsT$LUGLJn z!JA+C(aDu52^lsDwRg+I!O+E!TN;Jmc%Aal7;&`uz zRX!YSZnN1Dwuz4W;IuflW|`|Ts|UOK{Vo2CAOW+wgw{P71N6pK)4|pW2RwT+B~FEtYFWA z+{^ay?}6KLc=)5n=d0P1og+NmjxE`nhXlj~`L7sqOH>KWk=xZ3OfHT#ZhXIcHF8@) zLEO76*Mnn#`&w#)st}zujl>h63dRyS+x6r`!^!m6rf7w$^Vf%5wIdj{#H-xWDh!vm zVZ)FlK*ZA`3n3T`L6Jc#5(q~If zDs*xeW?=ikH9)$vCqgm$B$gfo#-ac(9Cr3#S6(?Pl?#4%-y$TnZU6|e55~82-;2zJ z(hWaHN!F=BywhI4i(MFh_R)mZ^xJ&qCKW?i z@$;P@YJ}+HYuFmc99mhAn>Y8o>6iP?fV%`vwk`P`w85-bPziN$->emD*#8Lqi(qA3`h|EbVqpuENHI9eZMg zr`DPaoL)4)iq_)w4S#a%UZ0psTYUh7FTM2@14dbP7e}kF?aTwaV7dzJ3aN=G2wr_K z;jQ3h6QsWfEQ_=hpj*A#C*lbwRte|di}GkGevAHWbhqu@T593E!Q6^fYJAwyA1&y~ znk^{C27;IKOoT2KL0Hw=+aaKsp;L<<<~ld={e%GhRG8zl@?@I?KB%ldE^`*cd|;8# zqcK6w0()n}NAz6}@ghsVXr8EsoO;@pZ4yCKwa^ z5rdE5Alt1|f>D66Bb6H{8Q7UR;xs7VkRFS8&|bl7a`jMv2CIm=jfTi}W9+f~S97BW zbzMQ~?d$O~*R$+7j0cW}uJf!wUbGOoEy+wIf)<=<9O15nmkErm(&2(zPp z#i{vu77j7WmZvRU|B3HNK`d$!odQlF11Xa%{$;VjITOVY*WvB`Jknsx+=-(xyd!yY z#AmLDtAx{pDjBV&*cFngW0^~I0hbPR40K(H5kPb)kvUq3T%Kg+YdkwWN<47&%!9pG z=Y35ow}fG)Z7*}zM2;w|W~CkZWOPTD%OJS|{@NU2*EHfw=ERgIuO=&TZ)E3YzV2G( zGcW1lKm57|6Qpac9YHgevL~FL33W%Hc9ub{KzwOENjn7NF>Y6nSwyXmR)q=Mnq>~F zGY)t71#H`xmM*c&=}+$@)dPJ9ChY^8m#x746mzn^jSE8 zmGhpm33rgp@>-I5*plSqc6EK25ee^o7S|w*ocuoA^lGgmJ0hd%#oJP0A$l7d{-d|M zXs;!|`vBKmUSzRv&R?ghTMXx6d2ApAs8thK(#+ox2pv1;y1owr|r3_jui6#@AZ&;KzY`SG`q z{5J{7-`>A}>HYkl^oULUCf?t~`#Yn$6=j?>|JG3M^4M`SD zCOt%aSsJ*tXynt9=yAhr|I~|qr9-#)qa@(Rob=P1Y z3#!pfr_9Lq-~$~T`Z4dIdw~cMB<6l21z5{Mm^50TwR;|uy^#O8vEqO3keuUHJROy& zMI1%+TEH6ENrH<5wM2<+=z+T-QJ;r!oC1u#B7&A}_r-AvkZny;95sCL?NG=;5e2wY zg!F*?&_t%g@Wv>_?%Q;HvK$W7WQ(KmU{oM3A2fF5D}Oi@rMr9$k(o%k@MV3_Y;jMnO(aDt6(b4RluUx(%@{fogq`)8bxr6_5f zh|BNk|6fw)@9F=ap#C?t(^6!X8w55)*I6A2XE+=FT1a)|sTJV|{>qs0R6m29ia{aY z)_3RYO}OK^KD!y4#2qwm)22_M$PUn@FG{rdkmZ8c1#!rhfm{CfP4I=7ffbQEYSljT z=OPq*wX&X|Yys77XEpzlr7O33UO*>axt`c<=i>_n)pn!qKZAEa19<;iXNMFnlT;Q% zOMU}mXo9698WBkfaG7OO9;>-UCpY3qx1o@ifU#Rhx1m5o-50s}$^*~@j2*;7&yv8s zxby;1$A>n>#&SF-YE674>aX~l7~ao|CcILd7zXlBo>a;-f4)>wFSGo-^uwV-=+ef> z^pFib1$d31?^KU0HV%#%sql8@Ysq}S#(?b|K?w4ku1~)4oC5qSwo~rv>RvjdSGMt@ z^i@-vyoEkzoY5#fkF^2#9x&hpb+rw-=iSI}t_o!T=BmGi+CLPOXJCZpb}7;=Uq?K8 zh5Z&xsIihiVPi*;DCcuemw)rCL6MQQa1a~fg4!>wH8ql`qH*vix(du7A4~R!TcZZ<#du}*BMCh z#Gg?Y#9xoG|DUOw(_p8_EOJ=Tt?7ixS!zj8hvFp%5=%aY`Q|%%w4BaQy`I1Qs$Sab z<0)_Ji2=C)`^yG>0KZ&*&z>*N6RX*WJVJg?X25z542yrnp5BWSqU7`xU0gSfMfIeIk3g9LzwUUUY04qjB(Pb#> z#&{Jf)%JN{6~6pZUHG>JasPnvO3w3!)OW6&QqH;3^aFI_)31WJnu1S@v(E;0JD z5-#|ptgwZ2vA9N+Z|=$-o1ZLQJzp!dOr3DN(YyIft0-pRDCeB~E!7`^MaVm+lxC$s zhcW1SjwHR(5si5dy2Z`<1(jkQ*yD5-)bf=BbcU{l2|rkYAgKkPuic0>4uZe56t#T} z-ID@C2lJQ#BDV=H5^uWd3&YEdq^aFNm|&{jtNGgp?JJee^t)y6e*WS&&jD+bymPUg zzx}Yy{ksW|evIoQxR5O1s7LVfwM0Jgm4eg`e}YKlCNvhw+@TcQ9A4MB<1<@Am92WZ ziqq^`u(kD?V77GDiga$I>C^%pyQuQjSf(S}}+FDTh-(EljMDBED3~OZ7T$5-R z@Lt=AwYRW+Z=3XSg{j2)Ud^(oyv3Pimk`K!ll#{7rq0{NPFp$2!)y;!E-nLeHtO-ElF}u;Sdn12r=bBv$ zzo>Eg&Zpf@|FJdQ+ROOo1Z70m4KgeFHI$nehJ~%3F8y50obP!l{yTfKwzJT9=+MUr z(Ucn*=d!Pkvemt0b$!3KIdV@ceK8#`^|R9@D3N=p>xme4q@3{y2MxYty*-D{HGag%Ex1omb0yhQ`!EB1a05D#h28G!PD&G^SilDkop!^L=- z9v#{iEL)tMUuAGkrpCi^)kF5sG4}(buczr^-q?N%#Atk87$P6KhF^|~66<4yG5I^q zzp=Kq9W@3+Re!?{ceX>AO=YVP1pmIC~G{ z@R@!?B}g_w`TaWS!w1Qj*-=<34BSxL(S$Qw1sIDF6c>(*RMx~hA%!V+j7;nb7Lkj9M@a*%pz&&UH(# zKbh+JSXIHWb2UTkTl3qB*MUke;Dxtj=NUq9lF4!hEzl+;7~CK;-zOO17nc)>Ty^)1#fH8B#WcENHeJPZChS%P6NLq7&i2F+|{ zFs!i1q@=C4Tsv#I5`GJ-cP{M|49#|a2Q3ud5IW;sqONoW@*nFZ`@(~(qsR`tI#^_h*a*jt2y&Zay*PmJxf05D#xzO`ny*;lbT=mprYbP0B(nhYNsvuTpr+xV&e9p|DgTRe|6nF&+J&e!QpJY{IItu?DQohGO;g4`lBns%2{wcLacYR>znwTmRt8A zcO_xEzM^W(pw%!0w~|bbAnIb%$;>j~z4iDO%re3ck}LolP>O?aYMZz(9jJRZJ_}WF zJvY%Lyv^EvR55e*X4-rE`Q{q>kq4T4y2h2%%w%`O(%8;{9_}s}GByB1xW$~vvWkCE z+E|R`I-S0eoGe|_aP)t`V^4GN7}Vlw5o>%XPf|Kr1WYpgPEw!x1|rAc3}!SaHI5J< z#M2a@Y3qk?&G|)+Dh+1oqI&s%*GpsM4st*`ibuTdN4^*!-6DE|S)^OJs57*oVFyM@18pY3+JUM0+-pA9>qu5ou;K{TU_ z<7iTGrY%cmJ=ML6*Cn-2HC^{;$Sm^&IC&I;F+MruV}dmC&vjluqlhG8lodr+sD?GcaiW7Yr6{VgU)P`*|IRme)ib5`GG`j(~?NJu0_`g ze@6R4g8uh7PX9OJpOX#r2J~!Ji;k)Oa4bS5D9u!^dJ_4-jLBAlwpT6!J+LZ?Nw~H2 zkUqZ(pN8A^Qw*Djz0bZh7{=Cq@tn;Lvzm9k^5jq*Kd29|D1l?v|EainB96dMf;8b5 z(|5hcndpF}E6ek&+B&1FfNe$2`uqEHWbiV6Rh&LZSM1%L{7^PLV?Fo*KJeXDAHfC@ zB8Gs33n02g&_q>&1u+tz1nEs@@YQU_Y~_aLAW+%a<604pS{?XHj)BU0RHvVaA1Y+w zSWAKiBA1?0fR@g>RRmtNxs?z_9?abyNK|KoUHlN}r?~B8+7~w^(3D?2-|FYT$a18* zzYtoY}2u`!rsqK3mvnhAnVbEa2X(j{`lw2s-h-y84-S3oW*N_6Htx$|eTEV%jb za*62gt0Y7hqy^GD%(tS{gZIlemHigl;?S^o;dK_5v5<UhEzxWVb zW-?hrM}<5Oiu)bB><%cvR|=*HWTy1h6f*!!->nQ%Y-)PsKRo8pGPBn18<*^unJ#w_ zj&;0S;+KKlzdr_b-I?VhGpmCf^#EM@%t%`@6G#el+*tRP-n)kz!H>mON6uj zRdO^>_NTroCquUWnWVRwso{^|O(cC=Og`9+F$KuViD9Jx(*u}&3Sa`t{T;j_jL1W? z5`orV+F^pzk+RN3K={bV=I(@!%#}`LYqWn9mDLri*KT;%EzPU%CZW}ZqSF+vlGumB zztpZs;gUeTT8Ixss2gwLO9+Eo;Agy7g->?-I{N#b$y~W{2{og1#5MhWf{E95ci;@y z5bK>S@mqq&uTBz533iAsHyt53H8>Y;cuyP|)1P!~e!o}F*AA{;5Fox(_j9?$E(XjVU&H4wTxQk?A1W%l=!n-69 z`Zrl9HO1bWnkQ?YQzO$xX2!c^XC971zbQi!rbQo$k)Q+7%Wu8=8~{QhO;`9Mf`>tOZxc z7nW@xsO7Z{BS@9Y_KNE!mFaRxxbEgaj)gv>;qJX{*zC59+^IuKdY~oBluq!22azfe zUg@aB<;1>6ex>6&TzK4i&ku$f|6Wdo2N9YXvTt7(D96s058n(a=)uV%A42gG(B4IW z$VVyy83mGuEaP*}>xnRCzjyfTzIo@%6KVWnuO^0s0OQkHR=lq9=El^WpTXq|B!4Jq zPTZcV!H?kTvGD`MqoBdpl&&$byq>tw&!qUZFnu3^YT>F+YR*1;36f=3*Cm}i#>S+I zb4dPa7{#6d-})tj^hN;0^LRaivy+1v)_0jbrG|LAhLjU~Z-;lc)K9dURKG>#^7-;idUOUxbtf>E^*A#=J6p)&$3zj?2J*R8Uj!?YLN>${8NJV zh31yvKFHUuqlc5J21=dqflJU?+hru$345v=+B|hP6e_ZbyY5`gn^E}%XXbvoKeyS? zEvF*9UybYIRbX0Deq#L(o6D|&!x`wez zy}fkQAZSrp-c@ksaU5x}Wb$Vk)=^M}y!qcfp$Z@oH6wSElu%sc-h49(pg;8r)Ho1p z+|Y(V(tR^l#6~AG28LItB^H_Bz`V3NDN8Vz8ny$&7#1Apv!iE0Khby(tYpCu$i20Rupe~sF)J~1 zNX$j>!LfESv+%MhOIJZ-d^Kq}Ju%?Zs|H>^gVDJ9^6c5<+ZioQK5$#oEu<&nE~KtA zkHCcA9K*qYjgtZQOK0_Y#tlPXcy*dLlQ5rN9Da7yaCq3k=S^Nf3>+lgmV(_R1SlM* z4Z$4Iv4cuX9+?d!$@@uo(2}~kmQH?-C#+Uy>{PG>`eR&?*K1an^zS-cj( zmgqc1J{&o4;Z>@4?saF~GDnzux~N&ivs%#4OXMsjGkqtV zjG3N-96@>prh$3`2l3jnBU90Y8gl!F4&iO`0V^eioM^LAh7<$;_p*%XmL{n^8%JQp#eTVUyZNKl1(^Vy{@ylMf&q}&+ zE>HO~?wV)K(eI7mGGMm~4nvn_K_5CaP3PDg++-}YI=Z`mYkvVug9 zq68wZlU;*`{cslG{{|oml|}wg(+fhM46#CbjZ8-`X>LQY?5~l$WYJkCRAlk;@Q+Oa z$?DP(+^IJY55vo{Y@=Tr9TZ8q48HkXpm}EP7f&vC7D&f|93eI-W*CXNlrOg6wKp{} z(o=B6jN8Mjzo0VFQAR{U_O-r5L-sw-49?{{R#D(GWC!&T>_OwDF`dXAVFzpW#`+p# zR=etl#NPVv>5-3UJ;Y+6{0!Zj>7L?K9heM8OmcRf)63D-k@2hbIx9Bk8%mKF&gu zC?*NJlH()X&13R)pwsA^!RF9J*d?nqyW3E76G53MMM~?W|AK57>5{Wo(AIhinmQi# z)5gl0oE`7of+}CNXdT>n4-CtFIpuSM0XDQw6m);?y+{LxVux@%_`++#<-I!$k+`qB zVpqvO7pOcu!|Q2o-zT_$#=2tu z@?BfY2?aGMRiu*nNsYX6U0=mM!sUBZ3KOqmElN*i8J&7D%ZJm$fjb~JB<9`X3%JAf zdY6gZ?5n`uF^Sx=*6Pc7q)VR+Dk|=t0?u8zO8{uDNsU7UKyOqR5_FCaVBa^bD6y9( z!=cMN98M{(%(_qNw4P486&%hTiDGPiBNjBWEC2SemJ!Y{p1Pg`lS4cV3R}hw&={jr zA=SZ;VMO%vy03?k9|d07h)}>^x$l(3j`EChdw7snk4D?=t4EO8u=u3qg4+;5s5kqx z(u%^kUF5zocAi`vNVj*>>JEFy(J_wvwa;Rbc1`E?;8r>55Y^+qFo5# zk!@2euzm%B{fpSJtQU zg8>RV;!mVF_4e(yU*2a(b-T_yL;^j`R~BdTaAdqEUk^LVQkOj5QlC*iUhDr!WB()9 z$!(=TmEBhUxl{p7RC`8qjX)+(`g@oV_?V~8Vf@;jlhi8TwlEgii~IQFF<)CuK`p+M zWi|rY$R_VEWI1!#?VRlu^FT)aRMx4lq?mI=T9BrtWBMro6_;dahvB*a-yjnZioK%dM9<4!q>8;(U z_@}aJY2vvN*B@7llI5!2{JT7`|FIpiCv<6R@jxH<2y{z>wmlN}3Sk;Xe2eIsLhhz) ztC9yO06Sv)IyrYMv!&OUEjC%YD&@bcNB`4(WOtAo7s7VyYSGHzx*?8$?$@@@$g<>r z{X>OdA?O(ghfuTn??|boIe7lhj$g7DTohCboFntdZ@3XZLYiwguD|resz#!Dh=<9u z&T3{MyhG%Qz!`zm5*HahaE?^cp@o}$C54z)ca&kPDfSVz4=?$m;yM(#`&dnX(%3Rt z49x}^wj~M?2<|a&5n#B+vmg=4q{B?q{Xlf7oyEt8J*wV2)jPo{r&MpsACWuPA#Gxm zXYu%y%p7;E`W|vSi~`7(c|&o>B0)v@;&uNUrOO3)O(`-{#4<8IH>ZaW(^>p%uwqV6 zyKQm}d*4d`W@d(-w7!w}r=8tmgL_xBu2e8Ak~K&-p`NvOpmp*ftJ~qyMJV;ON3%gM z7-fDcoo*SIOpd;`PW8C5`_YL{tO7OeMQ0nJD&U&VC>`jR-3LiZ1PuQa^CAy{Wi>w@ zAnIy~LXf3&)W}M8^&FX_vmG;X2j0z6gtC(lw}pB#Mn%TFci&B83Rcm0DMe5qr6XJy zV=2Iksjf&JY9d|i*TtTX`ymTO&hG=r#3(yEi{BtVpufA8;5l8}$e=#oal%{f0Iabh!mh5es)# z_DNva_($}MvfNQo$LE+S;$c4AS5j&W8z>}|_RMIAj`+gmT2^0Y{`QZG>cUW~375vU zG?{=;nHqL(?ooPjHH$Ki@@LcYl8FnTU3YztohxP(hOuQ&g*GcCD0nnua%w$!!t0bTlGWP;xAlm5$R2ZdaE0MO)Lp$ZN(}+;exy z@Hl(UZxgUM0PPoA4bW2aDTsq&E+M$U-c{lHSbDSMMRv8oZLgyfOxZtJ^h)m54f(32 zIiMI&b!v`JsKkNC%{WpWsT{ck`e97dQF#Nl6o3nExE#I6x`k@$YDczVwwLQ2A30tN zk38ajQ!RBiKU3Y~%AqJfk}H85(WQoP1s@~-TyO(|o?NybZ&I*^w{JV|+ICC?h|Rw= z@VJ+s!K(Cil`-y_2VKHur@JpRs$l&)DCLI`RA0fzJe{>j_n-lYI}Tvfwc$|lGjH6F zCh+|T9LCQbF3!@;)zdH=szv8Y!7?5_km*UK07lRT2$2J{-ZGYUW(8lz$m0D82AevX zSTN<_t3roax!0=0_>Oxnn}*$I!M?VrSYG)On zZ_5cOz4xlFl_5(^a0(`G(@O%6+8A3xq&bVXAtC-zEojhup*1Ak0{N*~(#J)C&cj~0 zcPT(hm3X$Cf~9?D^doJ(?<(Ukev??ZaC^h@(>a|a*Ksdq_WDiIprxH#R?C@@6 ziUGD|o2qAIGn5Dzb%cVfQ#P=Q!9ZaktoB!V`8sI5X&s1mroGm-7Y&YC`>x zneK6%1%9YdDMg#l+r_&)#oxft`;olGB^hn}+w=ER3CGQ?aINL~_% zrMod|cnuu|7seYy&Fst4lI>8u;v)K(FNTH8 zM1w(eBWcuG3b^s7mJJ%=+Xj|GnSyM-!^)T2dwEqM1rpb&Z{L@7O`X~}ln%14K}45= z+6aen^@6vK_=LifrHg8WyI1v_jd}uhb(384hQ!wf9}G4IEq#Nv3}GCXp+uQ!q~{DA z>aG;)GUoarM?YKiSOLz@f{}Nwe}-Q9Fw^a6i;)+>1t|!OF&GzuqP0LNM2o^l`>!JJ zI16C{$}#&r+1Djo%6xKO%v{!!%q{S==au1CLweiAJzryxhXrV^8G>{{_X_Ko5Tk?W z;v`Fg#^ciPo`}R~SPC1vReeX@H3>@x{Yv`_yE1!5qbY|f8)H?JodZ{(cnvU!6j6)+ zxYgH;3cRy7{==2wuOa1Mexkscq2kte@fV8T?# zC*Bko+wE#RmL7KB(T7d^a_^g&x98pOM`{cAXH9-htgeayV~h>RMy1#JP%_iEGPvj@ z+@*LL{~T`le6}V%EQ;q9y54Nfm_PZ*5ohzatKO+9_p{!w9YvUa$9rQu$!uRh3c60D z!hbI$e^_Lh>irQ@w_I(&W@*|&o^thN{rI(}l3Y6+IU6uwn032Y1mYv~lj}(yVm#Er{y+BK zJRZvb-yc?z-DJ-?6{SKXOW7usElE^%O@(YBWXqVVRMsSfBBrcm8~YNO>>|m&#h6h_ znaoh*VwQeypL4$FKKJAMx$pa&`*)x7c-)W2`C}quT-Wu!-q&k+zMik=YZVoENAU`~ zy-wh^9T8O>18_7H-4>3csga>JPyE#pwu+Ax{s+;!VKvWZJjKf)*M>X2pa{xPbltgl zq1l?jkKv@5QevKCXCRL^!!3I}rGvbbL)`cBJ8iWDf^|aB-m4%QJyIwYzb)34pi{F zh-a_e;$*&A!MpHn1EQ#WTwL``{IV&> zZ;qrQtPIAGTHl6_xy22hEGJg#De8x8pZrCthvCJt#})$SISgwK<^iF*2pIDd>~4qgisHuuFW?j?&`%G{r6Grx4Z8qiZ<^Cj!kq z&5?wiNa;>45b4bfI}z{Ie=l>E?DMKT?@^Ipm0QN{_}gv{qA7{!rr+z_3?rx&0c<8} zihI8F4@XVVq|YTj+NC_jRPPK!krAiUE+!7K8s@sW6^Py2dT8Ok3Wq&Zz6SG_*dty7 z49I^Pah#O{xquYucw)$v?w$Uzo7AlMYQQ^-EV7-MjY%lyQtql zbnmD-BYS-!gmRr-gV{`zu%I$kjeeUQL=OO0yAyC4Gjp?6W(8u25 zcS#xBZ75HM73V%%?zU%6fTM|E4v!itL#;B97wRsG&&_@rV@^iz{PtQuPN}zQ)-&Xs zQ}XUBZ&ez6XV>vHgt=*}2PlL-;B#&x4YE1C)^<$6eWVXXB@^&n^GC7Eb14fOK2HkL zIpf5o_6jQ}*EFBK{YA>|cOKo5^$5$&?wXaOq3&U~F0Odp@<`A(jUXp9T}#CT%o0`1 z_q3_&H(b1OJo%v*AKXMw1827n+XmGTyFhW&!2ir>vl4l_QNoF_%#W^1_kX6nvRZt= zW@$og&$i^)ZFMGIGwQWTbs$m!-44Jj@YBwlQ$y;nl(=|G8JvQX`fwr=F!y*_* z&d~IO3ai6bYNj4OA1=u==p0@!y(|~yMmdxH+-*FRp@@A)A;|Ueq%%X038;IQf0Ir_vTe8|m-@-~( z!R}_yLN(nW;C7u)>O|hVbaYtXJ-pm&19i9ABvV%U0}i`#T2222e_Z~`84(fj!vd9a zmv`VsmfKOAI6#}#RA@J@b|8x4N20}agsEO0yJQiaGBY+PEZP@Kjo~?RcirwGdkROj zKuxUDS4OTg?Px`(8meh$v!z>o$Oa;xC%lv2=HvGFJ}*sXzSr(asyRGelZolE)#oQc zOkoBI(e6Lx`W8YmJ!ti0e20}-m@poHQwAvwrwCr+86)qlZ*+^JNSu3;qNd)lxXQB@ zBF@;2&O!pdx=04yij{1*7x{p_-7@GB?bg)Z2fE#{wGOp1FIC%mbd{#J7oVWyFdT?l_<;k$+(U`99uDBun!)ChF`og&Vz&6B_ySD z3Ly$Doe)WZx3k(xte(icv>YMne(;`WeW zVsn8Z%>hHCH9*m2^KW_>&dRL$45p~hoLv+~i;7+F+Vip2Kh92v^W4~;xVms!yH7B8 zh7_m<^y!@KQ>JKrLsLNlJNE;>v4hR=hl5Bt4~kAD^i;ag;4cD#-A>&8%Ti=k&q5E) z31jzPi93BQhFy|i*sMd;oHjgcVV#jd&GlEEs5R)V3^ zH?5bh$}X{eP$U5Lw2q^H?PhNI!$FuT`op1Q{WqscDA3(cKP(giY2p568GtwW=kr7l zWs61ZjL}174*c^qj{MDu;up`x8$39fjqCUL=S%#%6SezQWGvsc`@_IO^*>*u^+*eV zA6my+Br7sHB$0eA+u>+EEseUYA{TUfXkf|M(Myp=d+ZeM9|{35lk{I#hrI$maR{0# zoD~L^`5{J!CgP2iX@()19%=RkD`6hu_CBVg3sQj(k^&d`+jl=aem~h>LpNkfLSe0O+5~i zsKsO-f>2oIpxVv#pQ;5lDQ4)r@bkH5Te^EslQIMvZFo-_*#1)+AAGkHMYSeQbhn~w z-7}Vq$^KH2S>)&!dz-a4)ab#h%MmZ{4I*mxHrC1H?ONT>`J$EFLd<_zWMhszB3oPvmG$=<05YAT@$s3LE4x zF)()tpt6TqTw<6`sI?4e;j`!t`E?mlFs1**vRouDMOyQOY4TH@Eufg`Ri5vJWZ?e{V~Ecl6wi0D5}XE3c^! z+5V-TtKGp~!P1u7K30T%PJDZ?r?1sU-AGn=C}B&;zpW(|7mcb(YC|Uw#PlJmPiyFq zIoT>Yv@S&V_hYKLzoH=ISFgRYKiLkW@xuR*(MqMsbXuz+aBNQZuTEnzJBLUAst>lQ5%&M|+QR&S9U)TflBVX#+>|?HHLwP} z{8YRpoIQ`)axiHpoY4^~=ue`EkX93}O)3hi+>R=Xd%SFW%5>sO7!h_vQG>s-M4{Wz z+8+*J+`c4&6#1@eS=n-nFnc&hquFR*$tz4KP1nUm{e4yGjmg*1esh{t!auvm0DZY% zx#Uw>(n2a*B8e#%{)gk#JkTKi!!h!QW8Kx1sfvVkSJ`phZ&8dt9Kgn{bP)<(A6QwB z|GH`ptc@vK2t+5j0aOIAao-_kC9qll{AbWZGh%`3_Xqw{2igB+l^@ry^fzaa^p7h} z`aiB1vbbs8w;+kiwECMjk<=P!?#IA}(x#B|G8>r6K52IPx4ew^hagSd-@J(;d2Sk4 z+^@5Sz_=1wpg@$a#ZnzKReRzL6k7l0T}`NM5G*<#_3ar3*l)rH4m8vI0D|p9w0a|R z=g;Z2oT)+kX64y|HY|m7sMdD*>7-Bi6iRm6xLCvNu(h?vFvd z?1OOhkkhh&_iL?zDSbUM56PxquBj$<=NG=xi-?lE%g285s=MOIe~MA=O%%@m7e=}AdMP{& zoN^SaVUwB{)P=07X9^dQ4CCejE!DQ()QV=8$aD&%!dU*_dX7^wS2Je{-Q3+(swx#l z6jt7bupj?e_XaCx^E}!L-s?pZh!><+&Rd48(4qN;XHQb!2hG&wKG@h|ZR1@0t?P{` z&x{C{fQKN1C7N^_x5oJE8GuYK*ve0tBF7e>V+87JumRM- z2H1jHrvG*SQ6zxNwvM5}E#3+dDgSbZrL0Zs9!2_jtY(3|UBniu`fG2CZu;-P_O_%I zQx23nz$2o>V*mAopa;{$BD}c&=OF*HT>pRAT}8}I5VRXROQ;H^h|dj7{^4Nr8&H!@ zP;HZFd)74x2za$ML&W06US^E9QAn!gjX^&}LCQgKq)xi4z#>a>d=(E&mgQodB#vsD zwDY^h{j-pb0F}ZTT z@3-E{3X>~607(C5Q|%qYl%DbR4`v*|1k!wad9@7e=FxGPKNq9jpOi|R^7G5z`u_Mg ze$h|kzeXJjf&+6h0r}`Z?wihkSrz{u_xFEtj{o=WFaPhHBe-qDp(RG11GKB}p~kw{NETw#L$)%t^nch$Yl>9COh*(AGP zKv&l8*XXGTPEZIhH8K`Ex8-kM+^gqFh;_%+4kG?2FTZF6QG!%{BI5K0q7lU8H_I6G{+c-!AWQe>UFS@Bl?6|uguy~ySkwD zvsFq@1$qbT-U+(|Hfuy3+uG!~pYtAo!RtvGtpr}9IpjTg*&*hE7mn4;)(gb z$`XMUZ(qhc#3}K=TWET{DHEdnwnY&;jN@t%#kf$_-a<0o{fGptqGhPiox)ast@riF zD+wR6r{9?h8%swnDo*8%Z;9ntn^GpB0MWvNa1|X*tW}L!vYWS(W+jlX6$H3@`Q|~_ zi0@i_jV_jrQ-r6rDrU=@pB{H+PT-sNS8$1wP&F|WHL3;O7`BL{4bY><2qbxG`BTLm zpJrK^5pR-$_H^!CDa%Vg-#8((;|_6a(qt1o1K!JUX6~(kzcTcHpsA0->}*oPeDT4BxTh zz}yaY(-Ls%({x;QWCqKsAEzVtnuZRszRagg!cCuK^XV~mgYAH#)JQ?z2vO}gwK&Is zBBxd_gU>@{73jnC<4LuWw*=}m&Ta>Zh(|Q@WFFkqUMH_YT_So8|F-Zfr?^j@#~j&z z)3=mU;1&0_tp@umWG?(eZ`tl1^DrwPE5dYvUw}C2?j`<~6e|(TkvG+qv%6iPp!B|(L99YN(+!*-S~L3y$ep7UF}s^9?(9xyL&5`rJ?idh!6o1B z>bw%}O89k7@8WRMkxGpaQ-F>BN3jfBc9-J+pe6qv5c*G}CCI-)bxJnQ=O(%C`2P&J zx)e%YhLu5QRXdw^0PmTj1-8N)j5xJ~sW9gMVz|5+O`Yudga$6&Ikd-$>-&-7h8~gQIc~ncO)t9t+GTUo_40X- z@7rU_zOnZWsxZw-Kc3)kvQF^)^ex|gnPwqSEG4(|#>K}`Jah4*_z%mlqRW?2Kw=l^ zqX##>NiKNAJVyPf(yhdQg$MbF!|q#Rko3P9)PGn6@b$k;$>YC&XbQd^f{z_xpUa{R zu-|Cp<^gKk79UZ+qNO)Nng&0B9t-uq_+8{*%W+7d-$~Nnk%Bi9BhJ32$tO!&Mi!h5 z7M-2y-eqf&&gHM6b-DLK)$Nl4!QxYPs~!wBZ^w|tO)M8X`nmAT=EAMDdVwe{pn z|o9CW@8WrEVIhoM*JnfwYyo}ldf3V!<8$4c-*(^WL5K(miO@!Zby|x1+IKC zv;&DB*Uh0^z*6!bm_)Jp_(=_}!!?4+wn&85z%G>oLRlE(J7bso`ZZBbW4tT#SCdmif8lA zE;tG%M{^t|`lN?zl*+$`L^+xeMZ{RuM;WZlR=ZXIZmK|KX9 z8VKxdAyye`2?wM9aG2Lj{nee#N&@hZ2S7Q8+3!Gv1;UUlOZ;MPgGZB&6EcoSb++0G zr@Fwxb7~XqZ(kGW+GUL6@UR8dZJf8FPa;(>`c1M?uiK9w3JiJw%lf#C8^YwgSh7Nm&X zwsAI>mT;gJzq8loT+i@B(lOF=9?`hrTM% z^4LNE-iitaj9^5J{`msKMslK4M@TYJ5Mfz&Z)U=&FFX48tb3G_yUaZcm6Y2Tst#6Y zZ8$M_pw{Ck577I9Z(YY8W)Db)LLQY(K7yuaM_x#*(IvN>yqBrvaY9bYLH>aJk>2P} zoVriQ)NH}asqXMVYU|4xM^jl<4Z@=Y zS>f!;``e9$0^bU+gIsC*L!XVr`Y!(mvl4NYUg5cmIyP$h?|k)po_Waf;_4y~fAH#b zn0}sm3pr@Y6ys*|TQdMs)2hon4+NCr>m8F~F=*9_Mqkl(YIi_@ugrX}j;czfG2)4# zyG@rRPW!$1C+u4S)p1|~=rc%?jX@mPZu5rvHBnwCT@3+8?WdIgv=t-nuJ!z!E=Ave zZGX~2?C7&DadYH*E2c~Vb+r{&2ej5v!_2F0R&}%t4B&{(9ewfer`Cv;Q>SV` zZN@eDqpniV)|jK5{CC+xYE<+c=xP$Y->z*riCFbJvHmX8A|SuJz&-H8$iqcy?UpiZ5tNM6R8Qq6AFO;$ebyN% z?$9*FHFf=Ip)CibP8>72ewhtAnObMiE7peG(1ad?4%J*_h@6-y3zzZEiq?a8L(3x=g4fGR47IJ5kV( zG=!Ca4QNSp6Pi^!QP3yIQhYhqzpQpYWy$uaVzR?jq~ZRz9>!Kr$Cm>>9CSW?h6%1Y zDhdRY!5}jdavd@}f^izjFr|)1FMPz;%5)`u+0iPW{#0*|U2@Xpmj!WiTYczGq_0d_ zfc0*hq>Xo)VdcEl;iC3i!5wY0s?f!XJDQZ~_!wy4g2Q}Vi%t394c^%us#s01w8vXS z!no%F4Ro5ce2*wrDtUoQuZBns%cY+lWeD%XiRC!W!!n)%dgaEE0Ab)ZV)qiN34jB7 z)qgw2nW`482?_jotrqWFli;N}ef0YKvCxou0s7}tuNn^AQ8c<+p?sO6V=UcqpULZq z4F+4taioUg!R9rM{8ywO@0n_(sUm+&A@pkLgJyAsZNjZ`$}@zyS@ilzVjGeHjNIZ+ zHbES^?!WVUjxGpb9k60QZnYojXb|Hhh&HrS^w4dxvWPui8Yb$Et(RUn%~ULx4KzVsNcfl! zn|GdJh_QMDJLR>Lk@5otE?G_h5*3=BHoshGsJK#?Lq;BbH76$TbeTkpg3c1IBH{g} zOHIJ>@KObdNvLc60r-cH2Mdb|XH4l4EQ3O|FTCV;9A*kvAC&e?pB1`r2Ugl-dTmnF ztD!i;Rk?_HT#~+0E}YeYRUF6Pd{?feY^IjO&u>rSzn*J9@VNZF+f5`!Om*-$a~*7p zaDLRJ1zX65rnB^gy5pVKT(*ZJO+MA|>9&sJ=^rPZFLm#{c=JSEs);yf%>MY}+s}Fh zmf3vceS`Q;q%*+h^8o9EdRCDCYzymouwFuw}kn~L2(NhLdZfId6 zd{`b?qKWPvV@CZFKWibIk+0euD48yfI23v?%FOwQFIN!rI+jSN4UL1qy;TE^Qn;zm z){t}F(%sPu4Hi6pM(V7>KOF7k4@${KHt%FU$UPQrIHYmpFA<3(oa>FhvsD zHJHj{pu$}+%Qs$I@&nWwMPIyoaim-S#*|jx%(Qcju9` zj59Q$R&CTZG%sKsrxkwJ&Jk*Ete~>fJn|g7Rnpn+`PIjBoXn+5y#$-o1a8~_Qzby3 zT<8FEdVtlBW5nSBCH5<{ZfpfcfXHcDv5tJdkTNL5z+J?Cgs6f3K*nj#c;SBG;*lSh z46+=D%Yu)gNIT<)zAJBQVAu#qVe6FQSM)n1M6g@$t55U}vFpR|AdBW0< zSMOeaE4xA05&m@Qa$|LKob~Li=fF&`gK&#Y@5MVdM%xM732w*-zLR@>Bnsao6AD&H=qgK`NJWL z(Q-XFRpVHgrXqT8cCatN|3+4PYHifI_wjkc+|d$G-s)dX^kVRlI0EQeu3GN2Al_96 z8Scq>N|0%$PvnKLtGx$&m)vWwTgmhVX;t;mUS7rE4sRbS{chB4Kn0H-h(V(;%v*2? z33>MtVz~K=Zy7vbTQPV&ebutwZ2tAgH8TZ~DwhU6xh`^3h3+*14hJhh6bq^oo973^ z7844Ck^>iiyf1%eE`J11El=6P$llfKIbPSIQ2TWDsx|lQHMcL8i?zQUXxIl%G~5tD z1X2mFv3l4~x74EM2U~=HWYn*O`OShJg8LH6ePvdZkc4ZkdhjF)R*@R= zd8>b~(TiLBCm}XJH&t?;ovvu9G;fmnlBrD-xUn<$!Hko5)6hb){eqo_o^b=mqO`Ao zk+2Ts_hxT-@K4`gWis^3G_L#v`4V^9e4CZ`q)gQP#%sF~+qT=UJ_>ld45)8elVbmy zO*6`!Eu7dv^fnZT3Ba9fd(Xl#E#1j)wmF+$^S_S7Tn&rLc<_1a&h6(P+}!h~knjuv zYgMzY3xH^aYayr=<!26M<)wNw-%w4C{|h|u*4=t zrpHlv-<4PE=^J1hN5iIcg=#AP{!KK(8FKOFr24)Hl@zw3JLU?Dc4^ENX9v^!8^=HBdo|23eAsG*; z#WRTJm-w&#BvYoO9T@`RGwkv6th*==gfP>T##Q6Nz#jXp9I0kLw9hSV{vP@aZc&4h z6!5@!NBhB&^j&tJaMZK@@PYbh$loGqC)0q2ysL$ZMvEBA_X`;AqSjTA7H(TLD|CF> z9lk?s_bQ**-az%9)11Gz#_uoWUOfhUc`ING5wL?q`X$UouqUS?p)! z31wyA#GyrP9C}#pl%be*cv37@8zV)1Sg8}!Y)7U@S`YvoV%mk+y6s)#$wQw*1vK)7 zlRw5!QE?qfF<5;f;7(}2De^JA6v~SFuAJD=74)UDO9OQn{q9ZCQWis>Jwc=f5&+Yq zo1RX6Z+P%yDMqyL`y)pym5;?PogU9mRdMonP4X(2A4$q8I{ghbR{=)!pe9|}yo(GS z#Pl>>hT-346?C=Sl=PeA&A(O%8^H1wFN+cByK8)@CKEJh?j4JC@UQiRq zjzWAbLfF1^RIMpR>Xqpn5djkOiSN4iGOR7LJetjVVo&V&Fz}|0r{mfgTD{i#QxYg& ze_bkwh{*fVqE7{CP}^Y6jw7lqY4i9UVPVr@A&XC+UL5K=HA6GS!>mVF0b}u=g?)8Ky{VL^_6#`bx~uDm$J|wXVF-{u;jPkoi`Ebh9`J zP`6`yW;GN#an!pQv*8wbsg?mC*grE6V-JVR$(rs?33E>#R5+BCl#%>Q9C>ICF*Bw7 zd3oY1@K{PfBuJeAw^D_CnzH%Y?VASAjNgboQ|+*%Lc87nQb*I&qwOcl81;6RrM|L{5=nJg!gv%?8&dpu^y4tz`YJD z&v}CEM>1vk0jExwu^pPkP?y<{r^kOag7K1n_v3T*uHT>|IBM`RGna7&(1bv+M`T6W8dNiFjqgK%Aw~5@F>}v@1g;YQFZ803zx}a;EqwMm`Ym)C;BrEFe&N@8)mz}HzcJc}4Dv(beCMU6!D)5UBbCG-p zopM~s$VA+Bj6$g;X;~a@G9Fr{(AVq6L#aj@e&_lc<=O>aZF9T1=QViX?E7-IxZRwC}AYo|M)KXh zF*X}sc;B^wu{~Ly_SsxY$yNAWw^jxS$N+d26^eb216qQ2UMo>RUjpw*rM3YijJz}~ zGODV!5aDZlE#pZJe@tq>pN_^>VUnGl?WM=&teb=A!A8yavt4h$bcF5Nw1&<~9mF^=_KhFrz5o}{&<-`=QHi-} zX4y%95xFa#$8{&CN<_+u;)f2g7q>l9rlr1xs5g*H+4JULzR5^47frbxbPhjA6oJ2< zqt4?HQ4hqb2mBR3BtN38Kwi1~HmrD*FXA3lkADjjJP}dq-s+x5WCWqV-Yoxh>joM3JL+W|qa)vQ(pRx#m}2 zPRmxSJ3U$9J0uh^)IgX9Fzzu13frNZ6oZkO&+_%@%+aJ>&(cKmz-N2BZ&AK;-c<~l zp5LlF+ovUAtfQVwpSnB4+y2}FrpXJB4XakO{?3$pH$p%|p_O@>;19|@9Pj5$4` zSZ_PA%$FXVZ2kH{Ot;<*885~q@EF(_@&IWgv6|({jlT9v$}o|42CT<`9JUb}1I^_VYj7O|@vxKR=%yE8 zPcgcgm+rVe9|#IBNRT-%8d=z5CNWY^&B2e4-A3Vim7h?p;!ByqbNvck8^Lcn3tFn( z{UgOBlXv>RwCgt z;oj?{Zd5FG=ZaOV;}l(0&vdAnT&$2nX)ifl+UkD(OTOu`z`>rlkL)fS)iwquGK~8e zV(d{IP)3fz?c{p#rgHE%ZzrYjmA|a7!kdC#)16mEbRwHh`q~e#?W3Z&6-vWf2Hp42YF$th==Y zmS_Mv4_OO3t(;6?NydpV1^NkChvd+*k7-SN*7|wydimIxs=^u0g%$#?FY8^b+YbGz z)7$-e%C~G-8e>DSV<7sM)1zCmUSK)d1GhlIA%FBITj%?@?y1m_wW~*eO{!)N@{4;` zYoy*hoTNfBpcG_}&m-eS&qcmg6## zYAU4NGB0uBYhd}YzmV$37+tDWC(vN;3p4z{?!CSq)VjB}!gJb- z+ee7!=TV@10v{WvUuEU!t1>uYA2Rx0vr@q~>E#%Vg=^vBxUS0s?GA@eoq=w({wn)+ z^@wnBkVYx=Gd-W%X(y}5Jm)|n9}X!0V$?lNX*3~D;ihs={xnE;`Bm#wb^|h z_nXmn?l-U|TbeFR&(uFz#`iF)>A4{8NJ9Y=S)J5Tc9L}I&h|#y_O;VlJ&kXuze0w* zUZfdak?W`zUN118e)6%sj!teXoc0D~mm+?xC-Z z{91VIzHQL`ami&$ic-oRqjQY3+nXYmcQ2#dh?B$)LM1xh@F?8Z=6LB1*(In$a!O_G zR(8J5^~{^vR4A`|VfUn1j*rxZ0413!@#D-TR{Tu^Wx5LZ15azAJ7_NLsW7_>V_Ml# zOJ$!~Bex=dr=klg-XizbEK2OSo3#IcV&cdqge-XrIc7%ztaL6@RtjP6nY2W9B;)w^ zP)X@Je0tT#nwt;5UdR+~TAQ_aCz7xC_<8coh5Ftjcau^Hm#tv6q$LnzadE&i{tUe^ zrBgJu=;re;jRFkr-73qs@(rx3eClScRZ|`Ar(owlTWoTIBIJ^WSk0iCzQu~Mnt<2& zLXZuMpg)d62!ESWipQAm91d1Znf}yH8ql8lFq7*xsd##KV&)$8-lCA@tk5-c68a8m zn?6680H0+F&&?oA#s%{xAkT@#G5Q+XEqTZ!raw`fSr|4L@8RWs{=psd{6_#nwt+r? zBFEZc=;u~KJ~WL0Q!^N<*luKvZ5%`Chof3N5YXXtw&(L?3s*$b!!CWh<1dyI*;%o= zk2_E?qQ_Z+Pf7=p0jdP3Dc4S7CaOps!)qc>4I+40^APmG<5af2EnL4`T$N<0x-n{F0VNLG9=Z>k*S5IK{3@@?%-8 zjw6}Z67m*bi`Q0vaxCb7Trcr`us>j%kL-=<$9itp<(_XnhTKdsKv19!UO^sgvd|`DRQsM?DQXF>^gO<=BHKhgu)s^A7T6{EtBw^{(9eQc^kgrzw)sI7? zpeaTT%l%C~EQXAV`_))j(-PAz;kEZo z*=V(Iiex6)p7ZtXjwcdsGG;3Ivf)VJX^ySYx5M=`opu}_m~xCnw!VaM=c|W&-5M)~ z)5S9B7w-$)58ikA&J(|b`FB3br@^4=x$QQq*brc|Qjm|;Lx>OXYHjVLtr8rkhCUda z9ojdN?OvPwC4J_M(UY?8{Q`Zv4>sKMZ{5@K1?;L@SlO_cc^rj7TctCpU++z~6Zx7g zwDU7%B}1Y#b?i1WXWCQ;`sEpBSKXhSm{thCaA-S+-+`^ySyx&3-=VBD!dw|Z??5R+ zFt=IaHM+~ff*6h6n-q3;;HkqQ%|@qyNyR9S!zEk}TiHuTxOq?kK2)H5+r&-)oa7?A zI@ZigeK(#PuvSD&pDZJK+mm}=%{RzL5ki!8st$mGKxS9|9qp3)9(x2@xlRyeqoZCodM~rKI5O($!fv@PH+hTo`13v#y=dJBc3;$(;y(0MxSIreSJ7fPO*OZlP$;H8lts?3xT}aZIs?^l(*&#@RT=yM50Y|^hzvV z4tegof45uFZ;P$TCB9_9y?U6|11FG^+JVw=#%U@#0b^XSc&n>nT63Deso{BJ^X20m z!g}0~KHs^wop1y-7Y!nmw67F^BX9cv2a&TwzFYKrw2uvi`Pig*h?uz#ftazHqvb z(%Vh#HlGmCT73~{`OwccgNQ!=Fc5nDi=wg%^+g=2E@GwPg`h=&iyK?agRs(c3>95AFD0V$_?O(u&Nq z3e>v~<>;n)94X!VEM?ZE@`tiC%M=9HUD)>oDzGE~=8uB}OZTh>`v@NW@%r3qbJDMV zW!7H104`Km?BJCyBChzx4{oM-J)P?f_*@jb_dG_Ax_3m^G^zO*x0TPWE1i)M3>9xwjbQ;*v^|Mow@7yk?bu(|ob9DKZdaWW_Rd;m zuR}%Ep)uvIF+~mzvG6U-_v|TTomDK#56GIUEfJ|6t!wiB4+x%=2oq|~a~lULzTj-d zcM;7}ezgtf8|fl(+-VBhZhW$TIP9<-RMVIisW8MT*q^k2N=~_bOk&FSv0vw;Dn-FQ zk1I*>-Z_n1-SX*=VQ zCyp~!k4+9v9zOV$Q$@GJ*su|7rsu4jmP6Pnkd+}sFbb?EH+z_Nc%OP{gIU}*osf^F z0o&}X&wNN7P&r3g>6!~=wikxshY*QqZYD3lkoVBkS`l$|1YZD+#sB(|cxYqy?ODrS zvAse<9~eAhwNZvRCf>9J|=|8`FX@r5wJ9$V9+EvzmUYbOP=9opebWW&!`{04BJcoA1 zojGrrIb* zWv=X)XrR$vo8<)GT=&)bB%$ISHKgZx>0*8Ds>7_mp_NvGI?u^v^iobdX36;_+nz8t zkARgBlx#*QJV3H7V;l(6VT+VNf|K4KzpJwHTwOoE>6?BMt#)XaYJ&;!#*V`uK$Zsv zv@9O@ zoNl3830Gg{Mz<;>d;BS1!7T!tDJla43L8v{8Em(GKY2!e^IMyAO}tO$G3Q9c!@(Xyo)8BU-4 z>MXS-GuA8>y?hPcj|VN8u8c$Jjjdtn$GaJExh2n>ivX8O47m43RM!B!_dL6eSpB4v zc&}z=4^qf9?3;ARAf`5PZ>oBiyyxg$Andd-Q$PF}2`k{2!WkDqRln7M=?~6-9UMO? zOvqmd-m}+7P|)`KjNc(utz&+N%DWgKRngZGDlaEph~d-W0;<6A2ZFq=Ot6uL0_+|% z^#(Ung#Q9fr*(OPVLZfE2$SrPy!Mm-rSY52`YE?bzE26^3sL0R`Cj|6i@wy$k&9dX zk@+a9@c^vIP2p$gU;=156e1%6?dT1OGOf1}+-jZfLevx!yF|iu09AX+bHgqLrV;+dUe_y} z`Y}VMRoZp-IX|yb-^Y^jJ##&$j+ail5C#pz8fz-ntv)Y-wNu_?5=cO;Z9~{eoN$Yr zlQ*>eZ1G0y$WOfHdhV*~FIVwWoa>RS>_K9_6Dlap9ntn++!EYw8L-?Q*y$D!K}MyA zIkEZB;OzSb6~=0n8&%zDCNc5_{spYu2xrB%lL3mtTD8Bc6we<_As-)dRZU|Tz07;! zT(klJcJ8g%R&+grDQ4G-+X8Ivlfn!&g#IH;l{=JE%@MfM%C# zw$T^VICb5RJQE)nCQ@B+6B+buRuy3mH@g*9U}(c0fFLRQtwm|*w@jZ}-IT++?bwtr zr8#NA#Zths&fL{mStRI@Ixq4gmsrqs!}BYlbRSlxp*GCP=9zY99DwUjF_e;W;mB9t zd<$5GGc+i`U)NOfS&F7?^~;%s>r$WZptnPRq3;TP~ds zStojFZt+8Uca$Y+a@>z+jW4IszxixjEH~|D^9^DZVT2PLOjhqcJHB-2=JfNd&~P2= z8mq+KzU-xhX#qPxMG#fC^gehCoZlCznkw$cfEN$)q1q{wTKG6?YIJK*t9}{GTZlMlz4|oJ&l7^xq-&W`EfQ9 zYBgXV2t(P!!DZg6ul5$j=6Ze=4Zo_oa>}>hLXdP<>Bs(?0TDU@S5x1K zo7OwYYDUc!BSn4@Yy+p6>^bh*oM_rc0DpdG& z22k?QTsf|qxYP*nVJEs6D_64&-NxB1eceu%`#~_z5DGCn+w@rbYUHHd`AQ4*EiR)@ zo^Brf=}DtN*7bjP2paw?CtDCau8+OI6Em?`3@sWIK&HQ$RqQ@A)yyAOQCo7L4zdxO zd2-EojJ_7>otpcI<1kxzI^zAO_bjtD#dZccfHBmZ@A*UpjapdRB7V4x?|z*c)KuN{8N7A*c8`+9OVB7)o$cG3fo=69@ z)C~X3K5aI3Cca}|fBY*(-L$lFPb(KkxLwEEu;wzX%Sn0*V=MatAnF(ib4JRgxvY}- zn6-Vb$9S5+N1456JRd zeQ-le{Gs|Q>Eh?B&@_P5A({*sK~eH&++?>wI|w8j%oT0)F+K7eCF5I27m#d>>fw**Ptg$$cYO%<127zH}!jx>-BZ7%>Jv7J7vVbPlvpkghTLn ztTgz`Y6LIT140e%KFSLSff74q)MAI3`j1>-r?x4HoOE5ef*-pO`mMw7v(t^u6+Vp3 z##@BD_TrxdKrkPw(0KrqyNSXWsd46Ryn`o!S^@}pWS`h&g;0ib2t_8`?EKHl$p^`I z7R?Zy{u9JboGj!4x}TyGX(fPl@WRmxtH#RJH~0*T{O2QD_MWCnL zs3%;Ucb}JFB=E=F$m4WC#KCTX;ft~{ep%gFxP2}X%NhgwYu zSJ3=`K3aY01vUQrf`MqKgpKh_*rQ{s!MOC^6T z^V>gH-w21HIUI4N+`S$Y^qhCr6&1QvC=n|2JsZE3^;(2OCX`$=0Q_h>L$NAJ8e&(;%+xv z`Kx}(tvacbCH}ehB6cOfMW z55TWMwkN^{P?Qo#P(vWe``yKN_TKl5-{&4qy|*=M5I?x+?~Fhj{XVQ+Xp*rc-L!9ByD*A4A$DffLw=h zvR7#eKwIK_7joUIc=T6~wOE7*zWaN$kr1GbR3J`&MUjNt;nj$)m=@86x-TNb+G-SuXzb488Iv@CU zr*Z9$?p=#`blhVUr|Rzbw-^L5sYzCq96D0WrlWKje>CZ#hoLJt~dE_Th3LtDaY*N>Df#3octL6lmH*8 z$et!_om1Ra&k8E`sT=&EUuW^Wt9Z@VKfPaRho+H(9HvrsqF#2Q3nK>JM1mXk7Y}6y zS$UgS-P7rRcEW^lh?{Y_=9AXmpZ;3Ev%SY}mhR6qjMP^4@^LV%TlfTvFxusfr{93b zwj)n+&aQC~l^P;~z5DQGpwbh17GQG*)=bkt?mjWA7)mAC$*>Kvpd7#s zY*5LauoxUa(OpagE!T|{#<);5$P(U-Nio`mk$!?G8#nv!_E|@B3d|`#Pv_a~NQp0F zT@W3fR^yZ{GwPvUlVT_;fAlR!CQ7I-Vnea~ix& zl%4V!oXg!$&#hAlKg&=K3=q_;I*l7Gk+hkV6x%0zWG(!e5rQ>N!LjzM(Qx1z18^CP z4ST;M=5P`PA74fYa=F*+lD_~pHdp)is!WEuuc(eC%RJhTQfZkF=n5s!g4i+;Xk~uf z^s@{kc<3=F#79oZOJ5_!xKj*)@SqU=3Z%OGXQem3co=+8CJ^9UV0XGit z0(4tKG+9zNdzlp43T*~Bj8K)=_s)~vLHlhjrpy0kklt6TjKH%N!KK`uUYL{zUj7aMV{TkWxhOp zd^OxiI1otM0{4f>`H=;R=>SB$OeIH@i=(!q%{pzL3$wj@qA4q38RuK& zy?zced4X@YiXDRkhCTQ%t2g(w*wRoJyw+uv)<-3<_8HJxF(>j_1Uvc)+R2nY5b3g{ zwRrvF8PD`t%4f`Y_Pezk&ew0HyuOT(W_7Xm;Mz{XJ;Abth{pvajU7KpUrqdlireK< zfhYX93oQSbPaZ#mEL z)P`S8NA*gb1P(c#dZBwZz&+wg%Aj&nnxJyprl!U#I9-5%(&juhTigi@_|FdF`Xv-{ z=-ZzU7k&O*9#NJtI;4E?bi~)>w>Bo$DHeu20+&d60X-i9Pt^1uR2lXItUOpzOOd=l zubNnykpxL&`?EY>mZJSAQJn}O$NKnAA8t&E3xb)aY?~4=3AqlW)zOl%fe&>up!tQ| z(oth`Er28j5)P0PxY8%w+L`A$0Nw7|0BHKfi7)?nxT>7~bc}c(dJJN8UkzLTAJA3u z5WZ(XqQH1Q)2AUgUt%tjb<7O9Fg~-)76N6)H^M4l;<`nQPAJ|Gg!uNF zCj7spZVa5#U zA%y10JFaieg)z!W-7Sg@pEx54HQ%#?uV$W%|JI89^5Xa@TljN5Qs#q=6}$`(nt8%x ziHkL?0#GEosIl%&n7svwcmamUgDeoK_9*Uxts~q34*Q&uoGc*qYSC?e3nGdmtDgE<%C|6su1bQX~~v|*~5No;A9a#%yau~362i3 zA7jEZ0yKTBJQfh8G@FI?cZ_7-Q1$*{H_&CUUEXqd6jpV&X_^65JNi@I2c?`97SAFW(_ zeKgSvk%9Fa0vP0IGTmkXNW!}^h>LyHfwAQ4G>ltO@3jnnhKzB8bcT|+lk*)@-7i;i zlpY`W)q8G>Yrzsg)0i#Fl;GMa8&I3a(!kB?YEd00FHc$#uPh{Pq4er!4Hr+NvW3?miBfd_f$u~RR!pIog> z*uBG26MOB}_9{U|gw7)?@fLYX#(FUW^^kLzLJZr5e^8?#t5xGCBI+j`s(k8ekH1ty z!d}7QtIvc4&H)M+;EOUcu+%DwY=v=JrQ2}d&UQHx>M!gGuT*6 zik{Yuk#~P;DtfRr3KB^NlhJVw&eZXPt?PJ$2Js+%lWs(C;~jgTct`m3IBLzL0)*P# zr9ni#K)P9&Y0piBPgfG@=Kw;U)yG6}6LoPsXd)%}O@&4IkH-3#fC@Jk-KovdJ<2*( z%(A^^JA;tA3K7Xbfu45`R^`Ph!?Q1f@s$B~Rddn_NEz$^QW1Au4s6~HXh;TPdbt(B za#2(r)RRMso?_x9#?e?S3G)ZDiQ2mXt&;h&^wzIN7B;6FJQT0K;2z^f;-+(P0F|}4 z3>HRpRs`Dt&@f4256AUDVs-VOf}Ra_5FN!QGIxi4KIvV)Cw7Na3$^E(yV?;+>@ctb zhyfCDdYKkphHEncCh^Jo^VpUH4676eG_kEkX-YA(+)U{C_--^ay28Fgb}imLFyyD{ zcMBe)U{=9~aH9;(gY6H{xQfkIY;8h&Fbhd_S;t4aGwRYx&XT0_sx-!S9Wi{l)3E%j zx`W{4yS$J}BQo0#fIRAYT2T`}!Bh*&!?vY6YA2$aPd#hPV`aBxbv`G}FOeqC9Cp9` zy)PzqqCMDC?@Ok}zUy|w#ABYNKJtF#joZ9a?vK($V|h#<~YzDc$DTr2p^ zR2sS*eePh{fs+38Z?Y9b=iR@z90M>7j}|e`tx-cH8}7hTn-S>nzv}(G>Z6|8G12Nm z{tHQ)?ReJ2I|FBTdLA-&h~64ovQ?k}Y6p71c&p4is_vVceHFjI1DQjnTO!xah0uU+?WO=SHh!a@aIZ2JQEYoB?0Cxw z@Wye_B%Ge)V|19|8YLD{gBN3YI(XH+8p_L5b~mg_%Jn*U=G19Bxum}2U6|n+0??Kg zI`KEHjs;c;8biJ+j^Z4h`HM#g<5pL_t1h{GUI3BazO+5BaME7DCdkXyrRRE=A@Q*e z4;1fRp-p+Snpg{uy1S)_ak_iLlA+g~0w3M_~r{<*FAR(cWdzVBPd$OVima z8#^&q!kQtdgHl`Orsu%_;yJ5X$^`Qf3=s7t)+(q!0mWbI*IyI5L8-~VqFmV%K!#lT z6BZ5C?5dt6Bs}~A-DC+G%L3!xK)0M!nQ`!W_16ECHuGq$hWXNi2|yFsUu~|u zs5F(UK|wr#(a(XBz>rItw zPk;BE6XIHNpWr-^q>`EG(fF1fAopWH#Gi6lv-q|^xFPDfj zhfn8{0m5<<)RrsF;9po+Z82np&x8)YU=??*mqfhzRB?1S|K#kE%hiYSl0VE|wP)NH z>q}zk#~!JGwsC?OVsVc8jQ;PPv@?EBG?bs%@A3Y*+vY$^=p>Ey^igT*6ZrJ&LoG4v zyHF=qg@uz>>&DS@vb~w^H1V+)*$C ziHyznv%APuc#fn7m9RL<#kJj6>nd+nN+)Cz8~iw&?wZBAsSA`9WeR)q%BrCr4s$OpPk1%A|f zTB@IJ>)_l2W;KLg=Le(nw)F*6T|ZbMDRKW@5FZ!~VJ;@_&v9*_SN&ZMw~A|Xl&U~q zd?`FGefVXy`H*;t%z^3?AJr5r1C*cQ=gty=v73I1qa(z?tF##!iTk>W4AzzsWmu(M zGRvjt?9TNtTH|LF5`KK&9ihEnX_u68-oLl6mO$n`-VM*fZ^gYE%~b_2%MXK|(n{A| z3A0h{u$9{HF~r3Oz|AT??#o@^77~55!|+wwZC0Vohul291)L}v(UCqI+SfZUo!lKh zo8llRDDN$Ne0#}Z=S!^2=HL?_Vp&leBniG?63{Q;#&4N&OKQXf<{{xBL*o-4hp+#B z7B(AduB=n^K}qGbWzFL@CB86ILH6Ck{~6@w`8t1YKkwQL2n0X2gL+*@W{lr3@4 z5JCTjD!)7qU;3?J1gR#_)Xb(}X>T58?pS`8P*%9l_Tk2+?;_B61gY>iW+Ld078mFf zzFH+-QEssO$}p-1C*zD9FS8{jCKs2U*)`E0T6W@-B|!Jh8w7ahgu7}nR+&P;83U2G z64#^j(Kg(~#Q`%q>Y?GjgPFe(`fu$scS;*HY#+DMtv3*nb>HQ-*Ys*(tL*5ifQj9a z2E<$VL?|Q(th4YXAlwxK#^WkCXGy85;p4BI17u^rkqL!P(wR!DI?uKa=R4PhZ%CVM z!(K4KfZXI};NO82`WZ$+&&bnt`%$I8_-y1p`X0!%=qyt7Dz?^s^$46(Di0pe<@~*;TNuA{Fr*2$d z+}kLVLPET#GzgJN-Mx}UiCP+TOEFJVSW|w1m}~hT_yMp2ynxY7mnSq%a_dGRp$)V4 zp(Sy(OFLAw^iIB|;f<-b7p8F%eZB#Kn@4``R`pg%wwLxe`Gcag-oq3GQ19u17;+83 zhX=BKd=wSwUL7v`hvr2P2+ul)ApRUFo4nW|rwo}LmO{sBx@Fr9AKaAH!kK)s?k|dZ zhf{J@2QP6EJCX}_tKkoDlqy3P`jUv{^eRVLdcBxg)CJ`-lKr0N8>@|r^pI|51U?`9 z`c+CKp?MTMDE~3=V<%`F0-@Hhcp!=+1or5gyFXs}+-yEp8 z?$-A*>BNSoN*S6il|Q8k^WCvWBi`XO;q)77i&AV5+io)fb-xlwAcZY&5me%B>)78D zeY>I7MSrJmX~OvvdMQ&yS+hr;dw6@CrXPNO>1dlX8$iw6wQDDgFEg?9 zdM2Q$=NOcGhulqwnkb`To?QP`+UJwq54M|5*Q@d=@~KbnpH#-3`DNV?#Gjtvft(|Q z5e4Av;^7_mYD7{E?iHHU(H4?*ZXHG1o#FAd@?9w`M?OKY*JCZ5rdYey2Y{4|*fxAE zYEfVs9w$w#?THv|7a>f|g-9R&Ev(U%0{orLWsXC4(l()xEp`)mggM&=7>uQ1!b+V! zfuf8?cPJc3zG@f_2=kx%I%PU&WT0^%xU&0l(gicgTelUGevWga4K3L?&Oz222Duyd zO1mB8{IP?s;v<+#S_w;UDt!D)%x-+_*QNJLWYSbsFF&$S4N(m?*!&VT0`lr0u_`>l zcIWKo7B`Eq0a4;fET27k7fNbsF3TfksNnUGRYL8zK8LOw>8Z6Jv!)z^c_pKDy}cI0^3$WZ$_Zyf5MJyx>wdEBA_udK-~GbN`F z+wSwd+TMh% z-|sWU*afK%*XBWZc+PrH6d5{Xop9Dds;4R-zOBBUVN*&jQ96<8dy72`xr7% zR{|0QS(FJqz$w>PgD&9ccHCug+#K7QFeXMyf~+#A+T6HZL@34LZ{+uN1P5#{rwQyY z%AmHCS>SYGGG-CGYHcF*>Ds@;%%nKOxVB-~1dQGe6T?vy|KdpkS9c&G##@%xmltUW zuPfZF{ zPpcnnRoc;*&*{^Qe`23!16AX^a3nqoiiV)ub%39=xfrt%9Y(5k2GY0E)X7S^H%;g7 z*u|dlo(esb?Aq|sr!ly?7*R0KWK21_n11Zv{1%WL1=c1%j+asj+u9~?M+N@p08^D) z4ED<~aYzLk2pJmp1xy%vCmd&-=q{ZhTb#H@CJ$~QO`E@KJ9GB_qM7aEorjvntY5)2 zu&s)^AOH5QFThCyOgN*4=uW?Drrp&qA<=ttuDEyC8~T}^_iqW}{>ME#Pk1^XV;6Je zmZu4l^TaBA9Q?Xi^eZ!vaXQZGxXid*mU8F47G`*xb%xS=Qi|rpjwxm-Uz@VsQ(PNx zl&3R+vus9lXK1wJ72sEUHI*29=p&WW1o561lgc|Lz3b;OH7;-OT+-{l^Hwt=u;%-! z6VL|I-o^pM6%_3UxgwLd2?9Tuj(P=0c)w_<+e&%$$c-JCJ<+SnJ74oZk=+S#fDs7$ zgnOro^G33F)S|fJe!rN+gT(1t*h&nZBm;H&0~35EEAzT;5K!0_TQfl}0!QT9LI-}` zSPGk9_g$MK&IeiBv5(=#j{zc(M_tGQoA!WzA7TIVHx^u}%^&wo99g5rjX8#g;s_gX zJK%V|QwP+yY}H&~_J=W2(?q%xa_vk8O%P-O&KCm~`GqgU=_L3{ENmPkK8hpZzsUSN z*rb0e;pY8oh-us(EVKe>7}&S1xjO(zW=Bt71KC@_mcbO$UY4j^p>LPk|M3XZEf?6>Y+v2&?^ zr01srSQL(fUReX^?=alA4Z6e5!9N@u{ncw?J}`@d6oGHW{}V*UfT!QL0Zff4_`@;6 zTuLktt_A;X$Qgpk+pB22f$5=X_@TZ z=wb?)R?g;goXg!ltmW|;PT8H^O}bapxUKGOC(`)%^RXDcb}xSXR4$lnA_u@G7Vb@E>iXf=bu`wsSWbW_oZO}zb=ROB`LI!s5Y?0c*DT8TY;(o~1gv*meu> zljo{Nb*1!IxXe?T!nQRR>UG+)4eRb4MQT(m{a*L_rxFk^2NeSCG6IR8ED ze5KWnwSu6pu6b7K2}6&I zV!w1N?q+A5`P&4q;OO?+>t<@PaC$`?2;5x8b$kU*$1CKoM1&-c;vxft(@WQ{Q0kBTg;%$F)&R|w{+~{OQ|Lsx9ctYS6$2tG8dgG$}|LLj!Z}k*1nMij=vebzj$-~SjFu{V*r4%n3KU-k| zztvDX^j?ugAT^Y{T{Tanx+6rcJ5uV^eZ1cNc-TSrqu+~gXg$sfn5skYmB;}YZp`i^ z@Y^3sUGQBvJ!f%?aC;V`1BTBA;_l5j+7D3${7i6zT&lW%-kCIv%XZO{`&;c8bj zfrUhYQ>DNO4_OnCups*&<~E~vF|-2h)eVUabIhc}r!z4pQ=Nf|VW~xmSL|2&XAhkU zWOjF{PQRuSTeLU*#q&0~05_UIgyI~b8vqu9TmjJ+@BmkEz&_12&NeAEo-nIiL4kUp zKr9GK*JUhDP~i{YwSZ>xQl~+#SaZE!Ooay{e!Io<-c6NGV`~0t+bwze1dWy^OCu99 z5&7U%ONjMRw3i@hInc73<-+*cvARxtV95Jz9F6KR7>Z1Hz9MqK_R8~lOLVl`G3~cc zEDkyvLcnshfq0@j0)!%kv={{)$SRNLO{i(uIM!0VR$Jb;(SyuE)UKu5!Nzy=)UWFJ zpYaNQf6eIpO0ESrn573Q$54yE>Gj4eH9Eq$mQIeq82F~&4aoM)-V-Lt={qd$FclMW z(;?=nRnUv3$#?6p0f0dRF1V^2BM$0A#KM2&wiBNp`!h$>a8!Gk6X5x`HQ!j1ZM9=` zaV&Gg+C;8UHLY2(!ze@dcT8~f#Viyz*chI+5-SY z{+w|HeuGn0?puJmsXF~15tX7BzK+4kp6ik2hQW1=ncFF^&Nx5amU{na>&x;A|7ES2 z=IUgRNE^|_hli?Oy#?eHO3&3d-PO>x82ROYck+vO$c-=U<*%B>HDhIY0@IYeHU~OB z=KYDFMX+_ymzaIrY|JJ0Vf4&}rlA^wRL3?M8QbGVgS$F(Z;9kyt-AK4^&3L!2YeG& z!kfqgZABYQ+eI#UHV-#T<;{^q%DX<0kJ;azxLt3S>TUC-4}ctabMFiNc2wV}vi$>z zNiK}F-;Acl4uF8qLU#go{u(@U(j(h=0PR z(~u&MDU-`PcQqLIBlKUg4D1O=r~~*`IV{jq{01y*FX+h^QC8{nEE^SW=;hw1(SK+~ zRebN^o;uTdA1MnVW7Ad(McG>&@R;l#_n*UOK@X+k+CqM59hflw2;2gEpu@OB=Od?y zHY!_|8mr!V`HacQl9FdUJUqM)?Ebl<<6^-TZk=KW&XKFqBPh!(^_01rmH|VA9Fm^#q<$3Af=^e$ z>Gqwq3ECh+38|)BxoT1qI=Uzem6toTVwlRZ8uGvFuEM(^dkf{HuFw!#(uh?ZJt9@aa-=-2m+Y z+Zo%Ft2797?SbwVJ{qI*41M*B)z#sy@)_AH4RqRj;gzYjeM41SkBGo5%zEQ#J*zO} zhaQfk3?xWkgMdsMKqC219j1iPWGYB3ccu~DYg}(nq4#KfBO&&;RHDJPCL!KP-L%4Z z1+u13<3Z&|h9?z)=epS(lxX?kC#s>XG=_F3tTsJ0C@_$60%kKEO!q*U0os@v}CM@h^c%8N;ug0(VBPHd}zqD9x03x#bW`pbahr0)7CyhS6vF z7difLg?((6ducznelOlqq_(z3|K@(PUFt$5B|JProN|y}`WK1{Qsh^J`)XsXdPTu* zU~M-n?tl9E@56O4`oDYdlcNueQ{OzUMg-)F!{foV1Hf?FT>7P?1MKjhLbYQ&`oVLH2S7Vt$h z)NNp7`QyRcM{(>~N>pumzuc`XVY(=!u**feLng*Ov}~cWIY!CZdG{T|^1*XOH#hAy z-Er}f(no<`<0d!_P}Hk&&?-#<7J_b_^m*n1y+TN_N3cIx9HS%jl7{A zQPK)@%WH+bXB&;!ez~X-19R^ji`j0TZ@Qm%HG?Bn(Ea5wD-ak0GK)BakIH(p6ka}; zwK>&ce%I|~zaZ4^+r0Vddn*zW=TBACLxP!9vJ`q15?rSHb>|-awDh~&{W3i<x1-2Tl%?MKFTwSYt{m;4>9rN*RIz9N4+$#}o*zXzk9KsnIK9J%A}73kn4Kj8^fgri?apudud zrvJB&cU6xrbNBIbr3R5(z@Rx$`^Kvi52z}_9KT0b%o5);&;uR;Z6x!dKBap(M?J4v zz#TcCz|)AL>$|Y33^L8ei)wX#`9jn=RvymZGvlyfDN3mKp#97?^D{g=Qa85jd%zVP zM7~Q}oS06E#nX|9=$0eg0_;&V=p0ej4vN)Hq$sGiQ{4G!*IQpJl;T_fs_ET5P{9LG~aI^Zmgfd_i1iQ`O&HTv>Lu#qP5lRszQw7*7YrH}qG z#Wa6-vRp6kQNQQtLoWC2cwNFh^zRWsUp9cg6pO&uEpKQ*ZYgE#Lc|)0(LdyPQHh%u zWL96|Ui1`yIl^|SSrvRe@O9uu^v+4CiAQdt2npgl~NYfG@Q9yUBl0J~&orySUO_8nySJPJGh59Mg-NixCDg zq9PyRUdxfGu9M!m@`?7eqEDVOGKlYIe;&8kZL_WGYd~=9c0g`X`M*Sw5r_a386dOr zPhiX+$Z7taq~<@qk?#qdT>$l`jvXW}dXK_?T?I;(|2$}0gWaoE$<+aQ){+0nvux+UTq3FmoVzK{+PeKD#cX6 z-ov#47yH{TB*26$T7vZVubh{>Yf|iOfY3~x*nHT zIdaryc*$1NeDEMier>Ssf+-Y}M(^RR+swHO+UY?r*dbADEy6C&e4zY<2Dy&0$V!-P2?G2gs z>t5E)O`mM1YWk-3595;k3N8GV;{XUepd!TmMw~a0VK*0>tNPEQ+>%JYyZRY{D6f}O z9sXJq*B_`PTNegw*)DFNc(V1k6lWd_RNSh72n28^T*HB%>8J+-;f^&!^4KNl-5WfV z;q$n43B1hAUpxt44zc{$Tjt23MhXl51xSuDBOsMBF{f3uvXpv0=fDB^7Z+jfZ%RyA z`*#ZOIijpMY7h9it|3^;^@~Rt1v3MSmOi3!)JxOj)A!8S7el*Vf2v{oWPLeVFL5qT zEmQ4@dy@AN9v+QpP^r7jv}cI~(K{%sG!vtUrXEK^f-X|f#TV!MTJBeH-6u7D2$B6& z?29@|$6Y)yVT{@Wau8t~Q3NIMu({;T;NMJ`!&=q3qVc#JuksOIbfHlN@8YWN%-wt+#4)a-wwJ%O^7s?lbC5VM-&NJt8ImF5ptBTrUb9&2L8AZfPAVC2~QRa^QG zeVuoMr@Z`768m=x$|hgqyBS4_lFMv`*G~9lhxVbDHNQHzr5P9GfkAv|D@gLs#eV;d zkV79nJ-Ea%E}v)G-o@h8QiE<8GJ3X&UpvsN5PjMI{T4w1Wg+MmZaNw#it&IaKG@_676c^7N1v_A?Uc9ud}p;CAAuBW5#Ssh zgv2rXhLr}Jk3MtKdXD!H^fcddK2|v0{kL+eYxucMAT}blQF#N#dMcJ%KwNBQm2Sw^ ztwT>1{+6b{0;t=_MDJy`wh>rahy0B|R45vweqDpE1c|h;dKjSqsVnhiwHv>SV^fCi zut&)F+Vg$TXAkE1uO~wDuJ2C(56O3 zb5!Vk=NTvHH6Jl@K!hO(_d>Qj%b`{-CGlG1!M!WHPx9PwNxZTDAx@ZHKCTfcR`zmFeluoWRaHoo1=ESZeAU{_Q0b3KH1-9kDbqU4L6G` z*e_gQ`KR$WppQuD-`WYd8X`@Qy1b2}K3uNV(oZ>|b|J2#(dFqN%|WHH*;(#kGUV~| zbgx{c@PYmpJa89v!q-yfE>%{&cm+k`e_!njG;!~Kj~)5V=A5hQ}SFX%1U4YvEsqL8=W@e$lYTk zQCE9F?arI!1S36GsXZg|RqlaLj}u@kq0{z2tPE1a%=vcufsqk<1hed5)Xmp7|x zn#*T#R|&JVFpkt^@I#rf7h`SrBLyOBjc(4$v<&$FX*I$sp@2hMIC zKXb=Mr~|$SG))i9=Wn1x?7L8g#(#2rzx3k04I3f) z24u96K6oaa6}nND@@o|NzL>L7aiQKa3}3|2SD{hDTt&kI#`MvKwD#Lq8*+RiX5d$6 zeaK9`g}?LQkN$9QKUuIpjwLpC8f?Ig-~S20BBcHTa4Id*zAVv3)blScEuAhYiBGYZ z=273|yI;;P4p{Uhnu15{M(kF~Luq%|k===DAVo>%W4_qPA%ywCYR;E3j#*$OoLRvq zj`g(pmRBABi)U&n^Kp{q&3LMfkq^20p{(bl!PN*!8+APequHf-1yo@v{LV(oK9(~vTv@aXbS6O#Y6gqkFUQfJz)JIjNJm-% z&MgU0GJ|-%)uEOnFKr}9O%{lBe;FXf+5vXCWQ;q&fC2-w96EzY!0sP0bSqPftJUxzjx0a_bl{9mo3E{o=F)Pd zrMS$Ioks}dyO0KP8u%fV@J`f3DpTy91Qp)}-@&q^1cVcij&qO#-D8yL()D|7<$KnQ zb%V~^q3))c?I)j@>}%#sd8e|i^NCqxmWtj$r}j7u5(;8WK!T_s>ODj!wE3fMsJ0Xf z$@cyF#l@k0!JDwlqW0uz?HxO}^4;E(Z2Db`xk^g{s|7oX8OExAiSgqW0emz<2JJ;L z&GGSPq6FuTJ#D-7FkW>u~$ysYAS@7 zl0ESEP8|k_!HEF7)d?7S@LO4cxl0Npx~y5@1Xj2?!zGOn{fwbsM+);DvD45@RC5zg z$~94E4?bS&y1z{ZUI7G3mnXz#WEQL0dftGLDcj)+D}mwODV@9hW59Sawnt?)$@aHI7^l!;ebn zCbCqSnKStK7S+!fr}AEc@2k4JvM>F(+wKXU_U`oZx>sWK390PfFbXg zT^RO5mKO*sPgjlPDsjt*0p!?KNRqCY_i}0sE!QgTx+mN`cKBRv@?{s7GLv0Sk;yJ0 z7O8D*ShSgd9n2W)U~Nfc3H*ZAKA@Y`!ttvOD`+ zbDCb}-V~?T{=Z81#p8c4Q%lYb^Pev0vzdt6ox#WE%WuJO)4u01U*1dXE#2?0`2Qlf-Q1mf<{*LKqnM(KVtBA-onT zk1qUCZ-;R6!IsNM=N+oQagN9KI_JLPXZA9(FM}gLYSiRMA&-xIZ~tf(^s;B>v)>U< z|K#2y_MBQzNJ{HI!tF9sdEKcCUybC*5GfgvsOF&Q0g_(_d3&wG0hxxlj`gJQ#oH{x zdnH_oYF)fJ;Q(3dj`h!b-zZTNfIN&mQ85Dvsj%gL5Nm9!3)}J*5FsJYed}xqvrauh z-mLc~F*lOrOex4&mkI0@zWP}&njAv_5cGMi;ecQCpECA{mu3m5L{zX?F;9)FE;vmJ94ls4JMbL3sh;jkn5 z|M`wpiX{Q;E-2m7x{YVc)+TXfYrForN##MWg19`W?-#+LkSwj9j-;_BX5^==?-WFM zvs@rmxPQO!jK4_t^Of`Qswt|b@f5f7vd1F!oL!522`XEgchXaVFPexI!nGNI>U$R# zjNmjm+VyztQ0>;m1I=qP=FP$B)*8M0Qq1EsblzgRkAIdtOk7{bz1p0)^<83e8?X?WM8`GltuLkdn|F3lr1nYTy$>_3MiRJj3x|p z97pa1x)zTP?->FLl?Hjqbq81Y)-rAR%_rJq39U)2{a~5xK(dsIITHTZt=RTD1DI%3 zXOcuuk?{-Cyv)VkqBm4jQ^?mDanW5G(gRrq69WO&e>(A(*aiul4pvvale4^+%2u6Cv%W;HZZ zYQp&)>hI$$9s?ja{J&JW24$2+;v@_b+e{=c#{;L3=l_il9sggbT0p*n)dET=?M@_H zS`WunP9cKIs(<8<>hc;S#<)xQv5+m?6Uoal1r4)`{@FvWN$SBXqVer`KF(o+6@@H_ zHs}EHv@5an3DZc+(sB1~fH}4xIknO3OtQ5;(IT09X%P98Ts68FLqPzFo(Gs)KfKa6zF6x!~- ztumhoH-ZKmQGh(40}`&D5gP~Qy(__tlAb`-j?%w4zgp)zJZbDD+&RG9d>{@LqO@bz z;TEuHfbOq`2bln3VbBF?p+lLR6w{t2Z=MVF;oTf zdEt_YvrXrUzr`4=opuhmSozMnV5{Uywgx1+nbpAXYIWVo(xI;pcijaM!iSI^!_tSm z-OwptH+gbwjJii&j>6xLkejUJHAg9L;lEktEcWAWEXR(U2>8U1RjVC9jjlzOTZ+57 z9Pg_2$xu%Tp$F7{qKUP?Ty9(L~oO@KLVXZoibkcA%0-e;=q7e>>TGK&~Pk&k( zHqEBon(Vz&eWcDO{DWbKbg)eOgQlIWijR3Wg7O1!j(qYAY9~vWY0PpjL(AE{84UQ! zD#~yFX@8zMr0dytK}Fc~GVNV>VkX~E<1*_=Qje=43&jQz>@K1RDyzf3jun+B7b0#U ze)xLB9%UV=TN2y(UKp11V1HX9-vg2>4M=bwwRB)ysRvgl*0~(ta%i+gAAQ*P4dbG( z&U}(rKFOkZcXRqEqAJmF!YSWmcfAmW)BT0U9VdbvJ&@~!aikA3Rp|Jh<%u5Z_vjfb zl!bJoYQi)dzgTW0ag~PR7TnumCgT`CbY`jN+XJb0-LYFqyxgOMNKpQMaycF^A4H5K zO5;3WNhCHJZ_kpM(I5zQymUUhGG*x>;HqNxWhq#xm3kns`rG#azZvBCgcCi9BYmmI z^(eOx7%0HxoPxHa1SglfqC?w^HV+52tu&%)&;XH&KsZW#bs`6kKfc-dqj3KU9R(b1GM)j)cU1NBBLKvp=-- zIG(b@tvzfJ(SsAjh_EkW$AGXsid4@u8#3DCrzpt+;UU2-nJ-u=%jbz%TIY}Z-Of}R zu9+cvG$MD3k)H0^a%7XD=#aD6LdzeDRo3g7*4!tC!fY64XEh5(U$FjM>vHHFb^eEo zU&!8--(Q=Oi*Bwwv?3ptzjjSj-!3>CX&E zkQaEgrQ96&Vh;KifI(a(43_%#!;#R*_hhC5*EsIdERvlstnc>ntN zzf;N=D*#N&b?ia37!wC3X%5sw+zF{q_IgkTM4kN%veMpPu( zzA=kMCK+@d6_w1#xnI4qxbIj#?48)R#4U!QfeZvirbA2^?cN!3_U(FlMj?shObd(> zVwe{30ce9W3ep^{K`*x|^0A628Us8Wt3gdevpnDZFGHTsrw>k%&X=^^PuXdc-0HD; zQ}Jd#QXKEfcaB5|WIaWffO-cah`+{B$cM;kfgZ6)BHaQosS#*mfa;2p^lq!X?Ry$^Hk~dpRIyl_QCojZCr5IQPm@K*(8lf3(j?o|A}13s z4-YT#Hh+KrO%D^*3r_A(I>T!%$!`N7UKJA5o9edd1lBi!@liD&#ncAh1=tV3B*10oLRN8NHKD@G z0<7HiR>QFNJPjv0BFxg0&~bRh=&^wza|#pXxos-In1)7C~O+&97&m5Z$5^N^9smd-dg&pwY2HX z=2r)=8Wz9vKOUHH3*I`9Da`g8m+EZbD8@PR6qBW|GzsXH4Yz zFYqX+zuG{CiZ4S#WVSF%4kY906K1TKnLZJCmy!73%*OzXE>XW)?{oC+7_G%mHL2MT z6V6#FT4#P1G&?Q;2aYMTqO#17SnHf?@UcSoc)u~#X0krZG(YY;skSGpfAhOTMH#^( zZTmy7*)2&wVETCAXANVk*fpq0LgpT%|0Lq>__j z%ARB@J7uzmBunMJG6Zr5}+jAKVoU72|IAAD3cd?9UzR#GT z&|6`PQ=y_)vOMA7>@WI(o&6i%Ci!I_Vy9R{dHSiwuerP3NC{m=UcED_>J$2%>ezA$ z23B?ozk;P$ElRomvO?{`feaMxg>7jyv8Mgu-jwYa>9h7a!|A* z=b^_@a_aQ&uM`yMx180ilsj}kYpI0o=;JU51mP zy}d=%5HVD6Z<}^jJE`HY%Vdi8f!X$^zQJ|TOJJbkxVHkG-NY^hw>{O_$h0~fs9Lb({?TIJL%+?KX@JFfVaDd5w4u#aGMiD*|a1C1FVUEEf z0)sV^xLRV zI2bNcL>*PP6~_h)+eROJ+5UC>1izclnkiRaYEgYsVQ5pn){=Y7W$n5jj2!bqj2w!t zf9c=j8)YvT!K1XnV#}HXJwUYrd(HzONcDGdpq~1im;#LmreiWOv;*J^w@9c%Gey2Z zY*lae@)U5c*x(6j0VWU!BrtR%ZX1lwjdw61S6>9i@5 z)`D)fo+*Yac(XSPZQG@V@U>B-aN+~a%bU)WT3*${ZDVJ8<&NlBD?fd6%h~kQ*S)uR z)ufkZKRT-T`Y{{t;Q9m%JNQ=GJ$mOxe`SuGGCe0sO5pekZVw(YC!uB{toj|0_pd=Z z9r`!Y6UHu6`CqDcj#wh{ue8*kFMXXv?uO)e4P9~t9!m5m)syU5#cpF ztlPFdgGfVpD(IM1C4Qd$^#J`k_k{z2Ux5CYlg^jr0hoR^(-P`FPj>{9V7{?8x0_N; zk7C~P%&)Y8F;Rk!KPNnLBHb4)j#i!!!=&98rFmd3Rk zJn4I5U&US|Oy-el$xIaq!mL6xM)w0;$JhtY0P`0N4^f3+b&soKx@;lplou!z3bJ$t z*xN^DmpQ*UbCAN7k4HWH z{7rausr~zl^IiTBZv7zCBl5f*RXU0lUDvRAm@Ex272VE^=#G;(OZo)0F8qai)=1Q| zo)6fqE-%Mgs&kwy6zoq-7HwWCr+RhZSArrr z(Q8+h=b@7UGJ4~0w)#HW7}`WNi}Js+J8SH+ce!Ih$W`V+sG%MlyH57i3lm5jR*zYi ziQ|HW)36G%birGcq!k@N3%B%{J~&*J(VNy|AL#9HF7LDy)<1ri2yavb_vOU)_Z*eh ziKuvCqzlM=pTC!HdiNmM$G|1IR$5Pei@T%ab^bf18?73Z!tIan2FN!_ECZ!AK`&58 zJM@Lj;1CVYZhjmMj_5dRJsgvTug0u|youos+TKOzWd-A?8iNkIkWX!`x+1$xEYYQL z$20@tt)gF(H#kmARFkg|_{#O*gD+Om6RNh_MxWcIyz8EA$H|AJ0&UChQZ-w*`eLYC zUXQzEGJKQRNo)W?K~VS9}Zq-|h zPp|492ysRr6ae&PBy_wv2D79f#z9JN#$@eiYrmIWU(TWX5(fGK`U8*}N?%v=`qg#s z3?6a~S)l;8+ZGcJyVE9-fKRoj<@NVhX5w9-xmKwDOxlR1P2?1*5ZA;-~!!)T0l7D_64OSn=ad zj+12IdjeJ2prSj$l=;@omWpRvEp#>B@5-kd7SHrUb1}!YpTLO2?LYp=`Bh_PRC^}a zmvc$e`yD6Eflb*3#P2jtn_Uc88dssJNregpIdE0QtU}(SgYenP^;xz@)4MD%Z;AsY zh+;R5JB2=l&_s{a3A_Sm2{145BLPcd2m)89#RgMPWUY?iAUja_M(CMJW(B%uze_32 z& z!!H<`H5pci!QGvKh4z=1!`tX*tImK4*)j({wjPGuyd~<5Zz)<)u5HRM>080CUhy(| zopT9|#LB4}Kr>YUmgaN7`+9tRDUuFUxE}aBl_2>5MsAiVYJ^YZN?;!KV_$f6_; zqA6*Bo)LA0OtXeAIRIyd1grWZsT3^lAI?RrAWNZ-kzmE)Dsp%4UOHy1f<9Vx z!Lk-8yq-SnR)d)Rb*JhdyN<4Yc;oz+1Pg7=!zYi5txX897jZ&&plJsQU6!@85o3>O z*0tzmEKQP#CdmWK8i~~+pdQEvIJk@ZcCn1uHJ5Gj;a*3u@c)( z;M-`8Ydl7}P|t*_Vtdr9ugZ9ruWgNB#NH}u#Eu#&bPTm?@e*}4E6^9^Up^kcFbQ4K zC-YHYJ?g#KOS2lJ2%J)I=8lkbua3fHd!qQUHKJL}HCC)}qsf-r`=st)`}FcyNqTv> z)+wM$c>lZq%=x$a%n+#ara?oil1=8gp5_84F_Tw)R%QXx?8gz+G3#-s(Wyd9|855; zDRtuSzp0E`x-IY-dlLZe_8sB)VpufmZbYpXQ^Q1!!{yKSCoJ8;d58-zp#sock?sq@ zdU=RlIs`0V2B^)XG2+>%&U-8k*nk#6@e6mZfVf%esW95YEAeBO=bcR>j9L#a-DG>- zt1rU#I!mt++MQxM&ll4Sa;`x?_XF~#POkd^2rb_aSo7Ya4zaJ9aM9|beQRRFJ8i-Z zhnUu;I!|&YLy}SxL|HjSf>czj`fTD;&f2?SLNK6mq~*r?3~u}7os%n;?o^qWRFg7R zt?j}sYHZpN%lpRhqpOKs>}J3jFOAe=@(r$@WVzFOsl@OXnCvdK>-12OV$GjFrRm?IHbJ|u&v99{u1HAhKU|UN z{oG1}uTJF6)0yn+4P~aM)W=b=5b!xGw3sTA(5ylM=A|`Fl>z`BJLr?JfvL_UN;!#9M3)6-|y46rW;}ia&z%ln_Mu1hT5RPr&yh5exipnG%rUucLmyK5nuC zr^(L6txV6&$FGDN+A!t$!mp2GZC>vp%b*ih-&Cv4>Drqn_mK;_P`-|(3?HBqE1M{O z*UF_^an32b6!-Mt^W0alAKgS3*RF4AI|H)UwU*vrbA6snXnf3j5d->B)jm@|KWYWH z091c?`Q@|P+GifE z544|Q)C%HpOR>XbSl=13!-7nGS4|G|xrW$j8%}tZFcwX7t1Ci@A;P##w4!ih%X2xg zkGw9}JV}4sDRqh{=Din94+Cmvpy>j+ip0sctPTnShQJ^~m&89};iYMFC(c7Wo|Lrt z&CT%r=Wl2uCRx{rNU zJj&?1a;L{hv#Unnu4aYUvjCg0pqc%%6M!&Q7vGHHdEr9@6-_%zh)~rbP?wO!KY=ow zOtISreKq$GWgZ=SYh8$+lCM)uygcYJMq(NutquVl=zN}w(zM!Hiht<8_o#nFYsK@f#U7|Mh zEq!Mt|Kyj6^urMQZa^e-L})wgF*$Fsh(_SJS_Yy|q@> zIcc|NWBko8PI0b*`;~6AN+_Lmd-NxV3vlgUAbp$y%M(b9L!chPR(t*%)yh2XWe^!^ zblV;mXO)1>Z^Ex_VXXyVjn10nwfDwm@d< z3swhmQBjMg*^qA0R|-t={yS=9Op?c24=`-n0~*-H}|6d#X4aum5*ACW2+NZ9I=-FSAx%>f9l7L!i>;cXflU%a^u&GqT~?uf*Kh zX&W= zQDe|?($bAe?{PjfrfWXkA>pVxR}sN&mx%MJYw0Q;mJPMLechJ@8aaQ|3Ihx*6R^_w zL}i&esYkMs0m=b!UVP42ykH6KN^^){cDC7#ok|69(hBB^B9rU+_BRPX>p`b)r~!~^ zzbX=7?7P7Z?H!-vnwlEYO8OqaX8AH-_>a_Z;kEJPXIcUml)MSj-C5WLif|~uMV3z;aIkdkk zc))N;VbP1Cn{Oxe?nv9Gre17lsN!B7wC4I|Onn_oxS8<)21#LVK$*G@$iXJaCn*5E z1Y4?gO-s*K=t~cIcHP}aBME>i;fzY-26LIGMN{#gjti>rZF+4F9ehtk|9pD*++CD< zP%xvK4WfTa>IKyrRJSUHCuZXd49?i_tXuZIAl2X{D;|s8A!}5`9J`A@X>fbIQ+`L? zM-odon(-Ch%#>s;YX_gbk15iBnaY4KV1blr5!&t`IqiPzB2C&u10cQ#_tb%Y2+j(-1+5&rFLe}WI5&h1lSmk|?{ zyQ|?kud;9`Xv0QBT*#Urz}5-?FUw!$v7g|1o%P67sP3t%{hz&ZZAH&-&3G2%n~KRh zv*J~Pz-KFE-l8Mt;|5>QidZ@01GY>RfJy}s3!5L-sxT3NPgAj|WjN#3jrsDTd#;&! zWR7&udwP0nw<_D2Wk;4baOGam0>=LcOi-=uGFDH`y!I)d$(=acRjH?+FU77e?H-mY zx103seKiPTYlqBIyMkOMH3F zpb_gYRoTRQSI$x~bi03*U~MkT9B_p-4)TO!Dj*4#VLF^?5r6q*IMG9VJZ}^C(qnfI zuH=eqd-y+l~#`=0E{WoCN8lK(q;$E3OX{^9I84m z+UI+)*{NM3QnJGC(?ey=Ndk$)%>k=)2Qz%`3=A_M_&l|F#bH;AwI#(Z`HjJb!`JjF zHycINOE13`s1|(RiF1J^X^w+{Mcnc--VnC7q=(;r^3q4|o|@k=%UXk!h8*EnpBqDs zWH$0lCV*2g%PuiliGV}8;srmT&-E(ylk-xLrXvZDu&(P{pXo=qpNjj?Q*~T3;_FcH zy{7XAj&0<&n;3GEVKNax5To6ape3+xyDXdI+FnBJnn45J)&NYz9l(qM`%q8NlHz+X zy`M8J^S7RJw2OJ9?R?&K-|N({Qx7jvWWJUw|LUP7?BwuCNg3;;!Nfg9#n&g*ZH_g_ zeij+nI2P}ASKc(^myTsh#&$9)u{P4!P&@LDZaiU*~WeVGVN zmwzH1Fy2Ka3Brw4MaXJ;NLbYL)0s=7;90KQTosyn(3 z0|1#XrjvFUHwF`<9N)dZtEBe5G$k%BbjvjH4ZzwJRY34M$+7pK-v%`Xgl4%t%_Bk= zBM)1<7ANeW8(gjGIfOm5@r?wJq=>c|De#Sj-$vatTq{0*B#wQ5#Yy(CHg))ug;{#Q z|DcXr))<_a<2qQ{w()5GJ-gBqCv;qlt)A$HCB2>D*LS)tV zec)+>2ZhCj(>21c6Bh(} zut0zsx*ci&T~r>R^yCVvb*)m5r@f19Di)5+A$n{V5SnS1 z;7*qCFk)xQ1dzdQU0NM-kMcmM<;!oBpy;`l|9J?=Es`&>+qw!2TOPX%rZL&lUFx3| z&1&#`dicS{hh zS+XCXX}2&48q-=Jp7L1nyfCLPyV|l8&6Yur15nPDM8FC;iylz=fI?zWjEjIn&IW?n z(C+sAj8;K2NDoaH{tC{RMJ#hWL|sQ>;TQVTV!>WKiU1b*kY)M7lq2OJ_ zIW81&)213^Irt-6IS>P zzIjhDwU-AG0mq?kbk-py)@RK}%c=bfyb9hN8G8f-P4!qlayW;9+SVX0skdG ztL?dvgRsdqbx;ib^)>;bT$serWIuYLL-!j1t=VFYjM-w=4;XAEh3jEckz2C8T!B0B_72Cg>=aGBOG{C} zwnW}7mxNk~IN*I$9zr43#F40|>kfr(C9JpL`0gR|lRkHrZ-b~BLUUXyoO7VVDT$`K z!r#D=&PTHnFm2*u2fwcp`V@#Lo`7Az0e(pz1z))*G@}^{U%f{SWXpW7BeGN;g3MGY z@`tRH=3-AsheQ`OsGOM9n~250)}|nMIS2i?TIY0)xkN~n=}gl6+XN^0$Hhn)8`Sc? z%Anp%u*{lhWk^i$U?(E-4&;elP(Lo_&uM`toO*|NKm+Ui!pPHDvI2>A2ZU2MIVX34 zJ=w?%kC+0b@_Ek>B=3jSpgs0>9(}s#yX1L~kN$DBQE)t?O{fo9$vDGzkPpE65m{oQ zRIPDKdM4H38O@q;9_Gm+2!EDPP`lPUieJ-dpX&VP$dQaK12?zy$w^hJFAwxW2eOa0 znCs^7{6mF|_y3tf1|e1>SIc-+PR}8W)<%u6O2sveI#RE>mb3%1H$gNd6v`W_1z#xl z+g4*9rWq{%px6c4U+iL3ec?nW|FDu$q_5jsgId1AtyxOx_?Z^_bkE6{Z5vN`_GrdX zKWMHJs$$&eI_RwV+Gl*wDJ3RETx|KWF1|P5FltqhX}6(DfLwHE7Yxmy{U$}nT`@N0 zp#O#ORy`vn&5~jeg^03ta)J};2hjoJa+!8t<8>yI^&Tc!CqMcaz41uGWf1)0i@z@O zIyt#QoiSQL;4_h6ZMoxBt`LOX{mymR1nbpts^_l3X1B7218<#fHRG|Ql;+u(5LbjaU* z3(mkYmx0azAkngGLLURG&LUkHn%~6udio*MDjbR~Sxf4^HBcA%6?ZhVFK8>O;7h-3 zJ245NOVOZ5%y$I5p3c3V^emfJil zyDOFX&Bt7<^(i=&_J578Y5~H)^e>1fZtWbGx@*uq>j$p9+*qMC-tP13yQuVxOfrD-=w3lO_S$F@sw+9i(HJ z7z{V-C_Rm^WssEe>vaz=tPm_y8+<(rzW&hDr z1ROFy+Oh>Kr`~T~97w19Xebx7%Gqka`nTFYW%1wt=7c)VS0oEK6N$fhhzop4osEQo z*zq>z888^6T~1g`5UZt|E|}FpB(2=+j}o?@eo>Aw2R|VMvZDOwCn%s zH8dcL-})V~3(0`G0AT$r zMNNRb)x&m((x)XEG{I0}7r#J(fUNR%JI*>dmt_K9N^>e0ztuxN*_8BdpZ+eX0}U~) zEBSF5Iq0+X(PL{smD?Ub%`qhQEQXef2#+sAa6WTf6X4@v3bkv$S)rP=Smpqy!hIfK zFp7ZlU$Ke|=szqMx`=7*WGVIVuwYOeC8q;sO*B>E9eTKyjIgy}yRl=7_w|>XxVT@7 z=j$&d^!F!^7#^gHT*v{V^N1PzIwzX;%fw3T6i%DexzQgLiCXRNEb+W;>>N{?JK0Qj zs`Qw}H;10F&)JXj%Q6|j0r?9}mro6s4Z6dB&Z#AcFO@5VE8~&5xBaGf(WOdxd4+9b z15fj8HKl~z%u$uun|LVdGY>%WZ{Dc-MD>}31+MqVrv4dz^S9D5Kl>V_XR;df2oQ~J zK%s*V%{J_HoPLRE61svO!UB$;?xnGFT)%*tla`|Zvd&LFG)nD})NjK{xc(3PUNYkc zQl-Gu+}?$MM_?#_fo3&djv~AO1)%njeGeK-3~~d+F*L_eU=sfRP%W_~v)g`4r18wP z{r}|DSR$)7-Pr;X5;ye|)o*A{lccbPMIbFFC%WQl;TA-Kui^A(bFAfnVlb*rPt4Bc z#6O*g{2PhNznV8fBZ6hgl0Y21;J}?0E2iw>80si?3lBNjr)bqPp0fqJUX7{Gp32=# z7S7QJ?$+~-KVs(o+5ayrg$R(&o8We?Hwi`qov|9JywH(T+0z<=u9bM!q}mXp)721l zak}r2+1_nV7v7tm16-HdEgZ<)&&uWBdf&n_`-XhcBf=iwy@Z)Y z8O_<4d)cIi8@_CxRwQNYrV^@~2A=}_p3=YSm;O6H|DS3CZvUMMF*E?NMBETQVdh$- z-Nb;MNupeXL|BH@CZz%YDev-iaLAyQijOFB$6MEdrY8fpYhA7yEv>f4mAkDmvmFVI z5-ofM=>m#UntwA36d|}+1-6KRCBu;=e|C-wJ;%j`)&TXCn9k`ub4TN{=y+6qavH%5fF4kHl4g+G)z$0eJ}VLt$PE5aPt zMbsTod#N3rJ(zO+e4wEH&)$jLY#EbNtL>VQ_0F3gI8E*p1H};2>h$APW42Sv* z?ZDtpcc`Kh4a@;;+2ngP`~mgD6N8zZVrLtR&!o;|ARv0boC^&DCCD?B4be-{KP*E^ zPgEODI5lKBWi$W5&;7U(WUXu0tVwN%{d8~MgMXg47IMckHQA+VahqAbi`)E{a`R7} z=3LkiM8o&l<&ZbjbFTs_0HY_c1C9|d?EGl8W8NmBG@;8!;&j#g+5y2!Lh_bG%b0kl z^#@n3S+-`oo8O2<)CS*8|E@d==kE~TfDtYpEKvhc9N7n5p4E&Hiqr&sS^Tdq+#D_E zqq|>RH@K@kwP#0LgQ<^z_s0-Q+(n^19SQUF)S?pmFI~APUH=B@`oC){+9ge-IX<9X zaA0d22IfhZuDhe9WV0(-fc|1dsJTiA-zEkTnEzkRBMfkH+*!bnb#O&05)|~|3HoSl z8<-(5Om`SCB1i<~QjPQ1OY8|Ke3raVtXXx4T)<^!W`^lX{J#|iaw5_HWF+|hcj-j_ zJJtK*-}>uAbbexQmQ9j>2HQql4rH|l^m!_iJZ+gG{%E$O>$g89y>IVDnn(;AGCJ_KbTY-Rt-%beNcEKExWCegj@$?h% zA_*W`I4O|1sRAI#aGbTj!o7f11?}nXk%z@ZG&}jIE7d5W`ePc*I>0U(!33qQmi@E{ zL~x7*kDqTI4*{}v8HnIXd_P{(HWFRnQR$*ojjXY1UH9JZtM2HC+qI(-&e767L-Bi0 zrC1zEy>tm93srC8Gy!SNuH6fYcRO;S$;yjpQ}EBHAma+nZosbwclrV3dgW6QSs{ky z99K#vRfr|u4bgizi08do;WkOJF^HAoLCd z!za;zH9CrL1x*Wd2f#>xkqocmbmDw|1cUW>0x+xqk}-Nf3WC=ruvce6vMf;V6U`EN8X>#ZuF2Mx5*b`%@KoBmvUr zFtBXciV`sKo{x(S7MXWDu|M83cB%?l8v(Xh8fxJKzxh^yY{VV0#XVfy!#@kf;yGND zWDb z`9q6+VN=(Xa;2K_;P(i|3Rr0X>ds`y!P}U|aNom@7s8a6LZgvzh1)%fv}4FV~J2k-fp?e1Uy{+D+y@Qn*R!@lppf&KSb{^gj3l?_t-xAXh=?JH*a#~q7V|8_?X z)&rv(0#z@HNMQv!cV;7~6RytPK zCFVFxRz7(CN-PhrxP+vXw2G>lx`w8nzJcMP!$#)EEiA39ZBCqVa&~cbbNBGQ;CInK zAn?-FYY~xA(bsR>iHlE2OiE74%D$hIoA=;herZ{GMP=3FCr@A0H#9aix3so(_w@Gl z<6ggcJ2E;pJ~25pJwsR^eqQ{twEXoOWqsqvCiN$LG{IIY_fjgTJJBPx4PT`{tT<5|?6c60y7Cm;aq^^rc>7XN7?EIBsUU6kTf(qpq zOMl_$KgLk(e~P2OGxT?Uc4i>_Y%Jh}u?az75auiFeD-S?VFF%n&&a}X?m)hlPIT-* zrnKf^fbtr>bbuPT19{;*k7ww4OyYk)24mA>(H4+{XUoo34Ikm3QlHZJahlD5*MRqy z=>NrspQ^vwrd_Kjjp0@QU$CEa96qvZrsIAAhgk~bVwYu?eQAu;bBE_=mWw%qMT14R z)maKSWd0|A2zw{U6m*i@fy7ouvkPvkc(^hPH&d8LzHc*jAa};Giy9p)zvjIH^%d`i z`OF&w+xfukK+R`k*Hd+;tC(+G@1TAR2fzWweJFrN$9s43At4GT35EuJR=?yKXDt1J4)S&zHmvf3})(wGtqEp7KD z$(!szil3tJIXe(uvti=UHg6SU3r;TWylE;%ga%NS7#wGqbPV7?&6G!daE7hD-GRi5 zvrR8!BxzQeuw_m3&qY*sW<&G_Gxo_21nEjwe=a{{5sjNMxxNEA>a+vFYB5D`mEq7o z#O!J7Sb{|i^@djeABHOXS2XP9svq{*|Hr}pNW@vwq7=%%8te~5q_V91a83KeP*(qn z1~)t4|%R`Aq+TC zHoSSWD_o{?66$*jYFQC|M0eOi zG)BEN1@d3DYoRHI^gCRPYxYat99NmDJCM6)K{Ar*%SWz>I#@vdtM}6_JCMu)6d8tM1o+vno09f(gRy-$zp`7bf< zK=#@HvjSBA+cD_K|0c!I+F+xuFj_kdk%MSirZDn7x_+tWl_73U4X8JGvg^@}qK0d3 zdyKs6+7Pd`Lyx`-0A9<+7*Bp>piI$Z7eV=GEFqCDno2dJEX5#{ZV?B4PragedIm@b zf0+H`^HA?e**V!f{yN>bTc>Zo*DP%mF=gVIysk70gdp&QAjpd9GB@MJ^vLny zZWMpGh878ETZm3P>FsC{89PVxd(~bi)eYU7G!>!7ate0w@zo=4-oI-^YjaV1i6RFJxjAk zumZPf3bs!GR3}=5?in<%D;{kba%|z}$%p#8m71~1q0%lXHv{2=A&Ea@L|M$Gy~1Yq^$^X+AMPzU|FzXm`o#k(f}Ds z4PiV&3Vf@Pt0En)`D&C=rjm}!)ANxWJujVG{xZj0DJFI=;l%qANmgsj-17P7Bf-Ml zv9mKX{bH6#H)7h^D>oq;9z7@NnT$5r{8Jm6403diAK>$y?^y`Zq`n<>qh!?T?!wTO zW?V|19If#97`(5|zGER+Y(r_Px`p1VwJk-nMZojs1y{Zk(HT^6jc0~;e#kH`8Jp{7 z<-yL!SMYp^zg0pdUUtEGNi&bGqQZ0mwPun;8FM-xOH!c-zyyH;BF6yW+jChJT@AMo zxJOv#zY$Cs3JNl@{qnKi=3YZB%WMYrIg^F)M3GSko5!z-ApBcfJk!t8^(Pu?g4kpt z><*0B-YhbxWqWb`@jf#vk7l_BVLx;tjO<_!XiR1>*`ra^5->yc`=sSY_Q96Vet&LN z0u1UjCe{_Y9DVWLlbg+UG3z+;ta5te8dd~5|5Tm@ynOzFG-!g@`HV!hq#pyG0V7tyIMNyM0-oQ@wIZ=ysSfB!SN0v+a;six*(GUMc`t(8^Jv`mrYsVzB}*IkJfooM7#4mL#a=9cg(j)gUO3~ zqS!ZxuCtH$iHF{EjkG-=E!_`ImWufyg)syyb|A{x-DoNY$k&e|BPO`W1luP-Sxi)R zz(ye;odVjb#{7_QbRHowNk^~6Oa|!a_xuUEfkf@|@o~?+bPDyZc)a&FZf>`4Urdx! z(7HeLjUFp7vOguuS}ln)hxg8?k{Ep%)n$u>1;Xp`D>h__d9IaeVy5s%z;z)5TeU0% z^z=+#4Qktg(3PH_cc1BB5I;D0d3iwgqS7m0#}pQbV7A^@aF+1XRYbuacnXw!5}yc{ zG&#JGOEHa4Ox51>7KAh3O*Uu|KhJfPhTZ8~vRJp3t*-ag2_Fq|kl4?-yw}md;E8dI zq$q~J4)x3t3p$TWW?J;!6WAa@Kqs?zZTe+jhw9ifGg&+G!0}fU`J8iCwd=p-)}0d4 zX0Lm`-(2}c!Xs7wB;FSgnbe;_)@wO*{Zv-z!HE{61yeLcv8B99{M%k zasOcJJuSDf>b>q+-?@+QF{z*D+m(!^wbxeaUTh6fBuACT-sFx$Q<%rHu7s$bA*o9{ z5DC=$W4IjbQUN6bc4r6TKg3gYM=wBNv$?>v5Z0itAtu$dbbV)L9Lj$S zdtPu>i?9nzI!Z$Uz6(2$3^N^oh^!V&(lh1qp+OTD?qyCZ4A1om&ki&vS(=@mcVuZsnQ)Z)2IOm*P) zLP0j(Za?!bbhowBFE%P&7cz0FPq2RWz+S6vZ2IX_ z)FKIBtMn5f;g$AM`(;r6CB8y~$dCNCd+(OSoUPuMEXp>h7l)jB7CO)(z=iDY5MacD zM)1m<`hnb>*>3eXIua%T<%lMFk#AVZ4ZozF2pD}}(WR>1B()!{%P&z$gliyFTPI$?w4wloM0z$f15fp6 z57~iK44#_dU7E#`9du_!PBY?B7h6GNDU#3ZUB}fd&sa_QwHXAZyR2)SQG1}6$8gKO z?2R_qe{Z4#-;_7hoPqGYk$|hTKx&}6x}fVBL1>yR9l8Ul3cAF2Jc|7czq|dQ>$zIB72DM*3gM*KU z*Wtu~DB3NA00q;(2B>q96vD~5QWwFP1t|xx;p9cdferAUCa{|~uL}2Gw$5_veO}^N zb-?$sRBaWt2_RLWx~w9qp-J-W$;xE4Zs?snxJYfj22VP|ej)X6@o30{G#>G}Ipc>c zK74+j&`3;9fZfnC8`T(kpA_#+qJ~!D5NPL&VNF%H{3z%WEyW5Gw9@T~85IQn3DL_4bf%0;=8{O&SEfvj+oX20=tq1ZpD4)hAw& znI(^-eI`~t-5Rwjn{s6$asvu4h!5-h3=oaaIw}I)L_7lH7Tdj!2e$&sn~P;f(+)g~ zm2iA^Q)m#i7uwXE$rOynQh`(|3$RoLP`}R`cEdRW4cWSxJayyxf~1gR%kc@Or#$U` z3VWDediogA_etThR||0rdv=!^VG&2NC4!69#=Kx0Qy~CepP5eIWjY$jR2|(+*U{E; z-cWkc8vAzjXK)=~YGBS?D}~GF_dYX7lWpUB@Y8+@OAI58(jt)`V8Tp;WxVhT9ZG&l z$jj6hGuqNEL zZ1m#xb$LT6`{|2Lk~2qiV~%Z|ZHPmbGbeT+GHbAly3>g`Sc667ef@_fa%EnZ>ZaN& zKk3-D4CQ@^yA`t?7ktA7w=zt*f|_BYh62|qtKaRtRpGLLuN8%Fs=VXIOw*G)$gVRB+CLn8 zjC~5Mf4n?gtJIirahAXDVRj{6d{mCDG1`3c6u$?o<-2x~?oEMzMJE*9`SdccXi7K_kn%NfkgDGO{nYE3Lu zYR)n+Jk{5HA~NgBiTT=(BYPPtj5{b$jAE{(m#t5>X-_Z`BF#l=S1EvGp}v86BrQS%Q)zQXE$rlP_IT~B78p~xln znBLMYVUD!k96Yuvr*@*q&qi(LLhiA;IDY3xltLV4sj79XPU`dew9v+1`DsukF{UU?>Kpl2a+@pdZh6!k!d z`!~~EH+U#Y)EA68Gf>&OU^|eDu$Y;Ss;2;S0gl6n;O0@ggCtzS{HlAZW_Yqz7Zo3A zBrGf{Wqt|Xt*mmr3tEM$uc8Z!P!BS)nR`JEl7jiCdZT)7EXG~wL!O(|Rleh~k&|Z> z=lJ^Te#hCdb54i2q{IyBTw(Ua*g))9N$?H+u?u3xZf8@N0kDhk>1Kj1T|9e%s}DN^ zr_LI7Xp!*=?S4$hKI%;+p`9b=TM{}~y<|~iErGy!okAID>dBpJY59OTlZOJ@Pb=U}^FEMFpR&8(QlvT+8O=d1(5kt(Twa4O~g6ID=-U!&e(R_pDmIF&RkvK*(?b^s4}2a zRb>~*9lI}0b}LI~X9TCaNW7BDeose z1oR4nPIk=b1c%OvUH5(Mc>bPq)Bfdytq9*9_zN^?n0A&qf)-$c#pbSK3S{gZ#C~!t zJ;;db4qexzEg3t}ovE|^ZS`1*@TZvl$DU5_xsWD@ZyY=GMJ)48Bja}aQQ8USI9#a1 zh&g9;F&z^f{-f1IWqub2lr;P`-*g0Ay5_8vlf`sHXW29*dy^nx_F&;THjvK2sDfS0 zjEREPTamP`R|455$>~4x!{YR1-18TLFuOG4n-aF<)psBxijZpP1ET{OPi+{xF&vCC zCQBf`p{mCfI9CmzdwoQja=-8;>ze4I7zK!K&QFV5U9w5DWic#DEu{=r+&q*ngr!~{ z4~B(grb)L;w^F!z>|<{9W9RJ?fAXH_31igU(8Z8)ta}>;fur2T~tdu+N`dnHd%owby?!pPc?v*q`s^wRgC1Nb{LNWJt6wlV_B66mjqa zDi|GGwEN}A1u_(8fSqWU%Y0I%ds6%X88xcD@u$Y$57RO&wBj@K zQ0H~;_-Hc zq+9(0fX>0>k?)~A@^4|gfm|)ohLB@tIG!8)ynAZ)lf&yRrQmb+hs1s^ioze0J}jEB zmCi*6b>5Ij*wn@bj5d6?b&r&-1f4D_jH}FimH_MiGny10=Kn z%5R7RSiH6aNyhUuZc@+*?I)ktPtAU>d%wZWe18hrOK5L8WlaM@Pc!Vuw*9|G)fI{! zGXKzF7Oh1Z!fG4AKeh{pL}a!^N6_1IL_j|SOy0&-@&5Kcf+_7 z`ogL&Y!_%F9%g%(vmV(~f}9|v#4IG-RCa%WztuFcAXS*JUEreq_(;gs5Sf?{#@h4c zuoswg)Fo_;fjE>aw0N{b5rFm@*{8NEMP2)%bzxlZ$@sXMA4l8)OK*6X(&C}u5E#nA zq8rzVC);DD)qEHaQ5W42Je0d%x50Ge+>=Wk3Y46=2k@228#N-(w$Yf48Q(Z?)_$Z!)_=->Oio~eelQk604rg|yCciomsVcbK3NY1PwRe|i^H5mD`2TrcjTLY6}uZh}_&n=B3 zKBk(V+Xp$97w?N_1`6fC42~RvOnwjiZXxQm7cA;1#EUu{;rQGB$HLF|;y=J5+yb$` z3+I2G>)-?ZLnd;-M~6|3y+|a4#h>Z&Cl2ukdAKIiwXQ~J9Ssw&F8^3QoLngO;P_i9 zc)*a-SNIKwV;=d#~8HmZXKsgtyEK&4zV%Xo1{PQDK3(KI&w6=dvEU;YeQA z(I-LL2RuVrEst38#l3MzyNEr%3{KF-Y$y5!t`{Zlt6j+r4!>WVCs!sg$%!2Kp>jLpxb9hJP zw|g^}!hxN6>wd->6Kkpi6#j84>t+?#UZ_UgMTKtZmRwIC3iFErMEyT;~5|o_x zwL_|fT)r>)wA-zOFV|Wk_ix>N_mb#k_%P;>q|4un`m>MKRgVacbt z+hK%hr2N+?BY3O{lu-Q!Q__(8v@v|UWryjvn)|GvWgLAMTij1qj61+qJ(N?PNS*eXo0wc!Te&rFpIRgdH0l(p;L@3j(TROV;iF4|AKWgbRSKyte|Q!nSa@Vl zq-wj;yenMfl8JRmNrZZ4N=TKQ`dwh?eqNfBGcf|V)s@OrZD|> z4o+-**rL13Lex?6H()5sAL_T`e3<~fc?WVErHTEATlt%N`Sp%eLUp-=zP1qCU{CU0 zSmUO9GVV6MGVGbtzGFFo5Wx&RIdRUW{c!DZ^ZQ!-&qijg-H9iQ&CYElXi>kU1WwsO zc|nllMLIqSHD73^%t%FY^aKX(EfSEI81^lEXHb_^Vy(#Ex!=jz;e#-jFT4oL+765YGOzbjFp&)iq(@d?4jmKcvRLc4RAgTlWPc5?Q@zNkkoEGG6a&j4RVYl z43Wxcd%;!YI8qTg2CrWt70?cUWpbkX;ru41PdtNMq)D3}jXWqAZq+doXyRJ)EHvFk-3X5*2p2z^EtcD@3vA+4pMgs+87 z-5BaZngPxfC2w>u^#&?sZS~qLTJTcdmV3go;~CQzPmgzS)^|V(%{laa}M&bO7SFKtn+(nbG3YL?r=-)o6=XQug^X3&2aaSG>h_2 z7Ml$cE_>a2P{2%G75iS*yv{fc6n7(@?9ULjxZ8B4j^Z+^@%Ykf+EErhY$v?GXjlTFwsDxi5z}R zSzoQEl2+e~;P=6PpBn2Q?h|`%Jrjc9qzKZ*WqNlYBJKL;XjVWNp(-AsRPhi+P-q{< ztm&>bhJAX`N0rRixAogFc)3+&9L?5jr4?Js?1CCN+Syme-SUTl zn-qJ`?B3=M35wXqZRhvt)gP<2otpBv+57?55ZQx6L#LpktR}Fu5-{Eo{U`W$L zzK4_Dpwm!7Vcv=}x%mum{V^bTbmN)HVIs+{7VkXre-TftZ#`3_i3*e4Yw_+7BdxfZ+LCb-r#?9BR?y}F81i}+>A>k7)Ppb z3U2>EebKV4Z$NFS??X+N?LhY1v8?DhSnPql6JZLv>_F~U0$j{DE;(R1;?{ZWT}>ty zHNP3Qbs{VVwUQGE-;SEl+F~sh+!^~Hm7<;x=n%H~anq(_^1xcc!P{y4kEvB3EG}tX zW$qr^fuP@D?a5{TI4nOXkn$0Xcl2RP>tUmGFz5RFA=|(A$wp>6Ud)UYIq1WOUeG0R z5lMz!9%3z9YZ28`!6id}F86D{9O{0$m^{xvNf=53o%Fo!ZIn>N6S^9x7VCgJMU9&N z>UNWy`Oy))?FZ2xj4vfOrMG-dLiYumKA<>DHkXR6ppG`UZsh2a_JfcC8^Sxef*jVJ znE>amd9WzaJM&|E@Y|D3=^HjIN~2fCs71eezjq+thLa<(8F2oFyd6kUq>pG3!K&%2 z&TeMThv&{k!%8zftqchV`1>_F7<>V`f(%*L7#KMf#k4JBih&WFL?Saos}l-XfZD4v zx*d;N6bunq`e9*#GRBfIARFzlAK^X_5IWc1fo$ny0Nf+TV5OQ`Tc5zbY!v9W!I-ivh0Mhi3fqCaxrXWnZyyWdox^?6APm9{u5zE-%u3)} z=CkIl459xheZ!l)Af_unBR901lEREa5#ZqR5{+73Xa`+(90;DTdqaOm^kY}%Y3hen z)Nv=*jhA3s%MOIe*Run$5}OyKE8;-_2So9HL-nm=*!9Gw#H-tirPxCJZ#e&FnSKLh zdf&9c7(FZ|MqdS`_8Y!oza#rJhF(G{`N&Lqj9H5M=b0J)E!F=`(-k3G1cFwv)EV7E zKiUzxaa;JXD*0Mc6~(hOU_1S<8e(UGI`jc?3hW9@Hj@eKc+~(LrWnn^wxSY3%+1@f zr>~^%hPgwi)%U;f}$lo*57Gy9wvp zlKf+Q_JB3<78qfd{T9982PmuVz6aM_mgovkb|A29B${CLkHaI9|Mn*l|0rPBZv4Y( z)WZAbq&($R{3H%^_@!T-!l`|=<8HMGMRiM~H?|DFGDZJ!lyGj?6HQL2qBHHbQSs2f zA3^?G&Le$6@ty`t;5ApNQUIKwtu{~1EjkF)Q_l3}`UsI>i5MM@SFa{YB6|>%0{h;C zgg^(i(jX^ToeYj>O)+Fkv8)X{kOV0Cat-6@To7EEDe^V%z!hp!heTUjf-cq3T{ZcU zpks=CaW$U&1U5e}xUCE416{yfum|z;)678mCD;ULJ^N`x7D#?ncR8pJF|g%V2whP3 zj}bxBdJLMD4d~AxsNov`Xs>bcIMCBLkd!`}HCt_E&fU-=`!dDU=(3;*TYM*=n8FfJ zdX5!c(;hg)#!Q{;27QdO4s1&k9E-5o278Cx7>3`0{)QMB!4cKf3H{;IA+ZA?D1?KK z$LXf^Pr;gDrti<96gucHe@6+ZE!j)~(8HB(l0j?b;mc&6D4VfrxfPnZ0d4=`I*RdP9x#?lv7Emn<+sA2-dlxwmeFx*reXl-^?yUj zZ)DSud3pO2^b73zGNvo@pDFpBY)s26$hTTu8Yup@u%=s{rS(A-jmm))GBHi>CdJJ&j@MRw!Jsc!^viz2(Wqzc33B%9;ngf!Amvl$byWl8Zg#6 zwcUr>Q_^bqzS8#z36OSTW3G(-qMXT~i&pHAFL@oc!lQK5^ zdYIMNV7U=u^)0q*90*BQ^*kZW zK0FXtYx4EMx!l>+Vj}O$IBbiDd}1l!YtcwvN+2nW{-8S^qY^fs3#~IG**sNCqk9s#+f9jl zQ5!WTX9#&l28|(yzG`~Stf=6s6MkYI{eWkHvIa3%z0#$A=p{xil6~f?$#CgWB12&O zfbH4y+Ln4gujJ|`#JR%Qo*_*Qp}2yNcE=JVUrr3 zcgwUK**7=k<@-RV<`K`H98}x4gX32YW1dxSMKh{F%)3K#l&OU!O@b2qIB^c}ru?+` z-gvRAO(Ny2kH?TpqnFb42i@->5r;lKQnK211}_@|U2_GXOC>*_=n$bFA65avDLtJx z&r(pgH#z(Is>vLK)v~ur-=DlATm3=o)3|N4TiV{ouOf{V9BcxRwZj%-?*hd7qJJ`v zB=11H7nl(n)L}W7dcs!)bH$LdB*Il#e&xULhZ08|q6Mc9fiMTVLR zxHM%hKzZFfKTd1YErZUldmrTyNO^5NwNxe$?juO~&g9jOBI{E313?slA-mK*m}JM^ z03-8gcz3SCoo_}RWr3o#U7SBNM#Qg-9DeQ0ay7q^b0WUK;L)< z7eH@PIY1=g5;wg?7Q`RVNm4$|6;=-LPJcJ7OMGE4B9m3ug+;)O>dqH`UgO2uqPk9C zFJXn5{qVZA@RWngH;tXmDKB4d@i`OcnPnRaQ@g8=XZv#vg&{6d zZ^9R?J7a~&y;nFjtI|N@j6n+^!~!YM1;O-d+JZ;Nv~$--m`o;Lw9eUU8)5{VfiIl) z4uctS#>?}E1U%8RaBl^<2c+v5c$_>}$b=P0rJ zr)=y%#F(R~djH!9l`!I(6yg|pBCll5YtTOo75;RlqG8hDoo~MS=K~?v6(h71HB`72 zV*14S4t`XBW@p^nAv0lO3QcWDx19KHlP}n4b&LNsb&&JZ+}RwPV+Geg3hei>nupG0 zQ!fJ53tLPmVjqx5!0i4$G3WoVDzL4haqxrC<~X!?UcRwbjfh zM(zh&Nq(iT4Ih~UZ5R^UO)`v)`l(4s#uuXi}!Dfpq7t%2Ap0)C`Xx$Z1?R77D zWq|-!7LHwv1`}f%4|Bjr9Uxtq@m_sdF=cnPY&k!E+wINMA%RcB=8xVeobJ3Qyq`_h zHJ3RBZ8VHG0nr8aC9v_plX{slR~S*dY$lquK`N3OFKtDiY|Awr{$lIScZ_cEj-(cd zGj}TflyW-sQ|9OitqmO2kx|>R>nrRwd6N z)$NV?DekW2qdvvA2LOL+5hK4nkhp~ujW%A8NN9wMbiYq8pZ?Y~>a&(4{7u1zFDpiF zp1)Qg{LYDYUYNR7Cs!FF7R=$z#DQCMcM1$XiZHCOJl6Q7I!Y|3#W#lQ;X%dESrDmb zsCcwghY%v1jHjzYIZf;g20I-N2$g{x3QWPA3v_p&+dgbN20aUiz)eZaCMFiM)(2A6j!kOqAo$HZeVpx2UF30l&$3A+@`+vAV9MxwCO>XoMhqm0z=XDubcuCY(iyD;q_^ds_ z3VVmie+*n=jlh?G?$G?to@-~WF;6(78P~x?xrFxz?*d(+SDqKxv`s!HC%;0yN=RMo9x6&}b$_RoeTLu|0;u&^tw}9bQLe zd8edFbgRXzFSklc9C?d*EsxdQz|lnqdh^g^C+O^eEvP$g(UD)ykP`{#r=u|oA9LS1 z>&r|idLKOQFH4MXc_2ubeH|gmxKdYFH?neeY zztm&p#2AooNJYQ_ z{63Wd1{sY^6(G_16&V||;$qM-_Staz{+pK<_OtcaY8~9`pz9c!HgWn%#NdYzsQWPd zK2i>pOl)L!XTYg+?&yK%iEwgqL zwmhWR;r**;CMawM1!BH*F(@H1rk#sC)SKUzo||1;V5f=mY;J0#9FSa4owxKlo*MWF zRh~|Ju{%nsTC`TSYHbtohN4An0PM+kh)Z{{{+OiF9mvTslPLZwtFgrrp=0_$#3XRC9Gdz0|$Z z9{DUY1E?;AL#Gl2b-x~9J#-aG(QeYAf;0_H(KM0TZ%hxq`5WF5f_Y<*QHR6Kg2yI<-Hg}bpWf&1FM(6u)G9HQ!K zn2Jl7j%15#*Pax|N4_n}NWw5YJqk~DrJ-aHds2x*D`jN=KD4NbO5laC##bja_KeJr zx+vpKw7zVmU%d+HUnV$K0bX}4ZuM0lCz+g-u$8GDQRA&9HWA zM<+LTofSb*1j3h9wFXjO}ZgmoD$9lg*(8NnS-E)2_2H(#1iYREmarW1V+TSTFdn?2#|0cIOrUwORPm%;Flk?af zLDoPhpNVNrMQ#~Ud-u@7E+t1>)Aw1{mJjQcZ64Of{$#u#?}@~;fn|gcDW*7bP+pNy zg$jnU!4WF;A5<@dXnvvMonAgVxc72W$H2Nok&6&c{%X-FE^BHEDN-)8`TKxL1f`i^ z-vd4=>f2dlI#DvJkCOWIDkC{a(R%QDxU`hCo_bnw97o6bGFGfU2`qU$MQW3s+H_OF zPEx^`Ic?w^d84;ke4;HXB`h=q98jw4&`raGv+e-dN1;|16*zjIoal)iV6zCPMZ^=5|=3aMKUjJv0~qojks=uE0MC6jcIFeu}2jm-VB zc@l9rnxnG9_sP_Kk!icTZ(ry>h}-uz=d4xnR$f1v?1~o-K=Tn9r3gVXSHGDO;2Owo zD2++!{osC7*kE{XuFHmQ;Z1g(lSt&0;zgG|aV#kjifxHnut0T85;g!kTSdTkhQ*n2 z00pIA4d)JEfs&h*fpbE`*ZPw~^^FUiU96o>ygQeEJecQ7-<2cmpQX|BU>}SjWvwrg zsaOx-$omho%a`@UT^-?f?WGVt;3&Zfq;hPztc;_nsI%^X_p-PLT(SrIpzD8*`9 z(M3@Ubs&t;vzLAr`jZz-u0B8APVH)AERhn)d}kUd%?T!c*I6Pm z$tAg;KK5gNnrmH%=}8gIp4xls6t{`M_&SDUNAp3LFz$l+#jfboh020zv-pKw9&)O; zeBO=?`c_B*Hkfj*pZmoX%!Se@II%5I%5vuHKEkzCRc0aglJ{S3lS!} ztrllFD-T>rpST?)v;0+&D=oJCg3zewHzHClnhQk3dE41a=?BOscZHNi-{{|E)Gu+b zB7j7?YbBj`#f{*^pY5F!um=PFJb=3WiaBvD^#(2x8>~A;Je}B$YJl~~4iN^O2z$mu zEKla2DtVi%9m=Bl^1afqVYPXu{M(Gns|wGcHnDfE zO@?*K)StT#Woewh%V}llRjsALVLhLbvvDXz;Qp4>@-eTsU*f}kOhIr-5j7c&EsSjZ zV}c&L_uos(!4jf5?5sUay$#2_tBpF|NGGAXF6=-S70}Ps827!wl8Uu>C5rE04gK&9 z=!*sii_2&Oq!X&^GImh}FTDd<5efvK1Ee+raB@#4zOi}5yb=M{L}L4q!}icrz1-no z6Rp~1m)wQ2@Y!mD+rQfcp-j5&8NOE$B$5um?KyTn~#%D;<2lbUq zDL@#lV4!^zRjjCX(XIsKD8?s%4e7}?qL1EUrKn5ulK{OOj8c_ z`!DDxq+X5U%)TCV>BFgfJ>0(OkK*}Zhf6q!!nAZr=M@c9ByK_Z^X*&}VI5 zZkI%3EMAhaDZw?HrUxD4NuevN3+^?mdvYR8u2{}jmp##Z+Zijs-S_L487xuLH>rLt z6fmbHYA|I1_svXBy8k%I1csc^f7m(_LaN#8Bl)aiy?ItS_}%L|G3UM3Z2An5jKgAE zhGZO-mr)7>Tir4JaAC015SuI=dXGH2V3l;1Vwi+5e)?4x6{$mwYxA_tY_Pn?7w;^S z=69FnXH&`pQ|Z=yH}tR0qo>WOmVguu4Ai=uQ9(69)cE0$_H*TcZtsjqKJxa{?mQ2h z_OTjO%eMR(i~WPj;YSzymHD44pXXc=qQXdtPno?}k>f^cJzZo~XeL~mq{nWMujx56 z7*rlG{N}-8OUTi__`3%LKOM?^f7Spad1ljU1xJ&{BZr_3YdFk|{8F#OHwq$P{954& zZPO6`U6ttvG#1oHwrW-JOBZ(EBh9ACQsz<8*mKaC6g;(W0vjwiHMG!#p1)<@pRb>r zx?pPTHH>WvQSpXdj$Pmo>2x{1>!(c@?le#J(kag$x`l_sDl+e(=F{?{k~-v>AMuU4 zU9IgBe&hk0h8A*DqhOrg%beD_(a;FV+L1Fn>5eKJ!gGp_`n)Zj^N>RrKyhisou*5V z2DFGn$yjJR)R#y)VJ{zzF>kaNw$wNFHa_LKr5vzVduf+gahCgqT+4VDt9w(qD==Hw z`OGQ7KJ1G`k{HF7nn-ExO~ohW=k@VToiJ>C zx$YV8*236s@0AcPR+nJ;oNwqCsNSJz|9RcMwRjK*A|=y|15xZ9+JScR_vW#?Jp(^@ z1sc5~T2UQ?8*QV<(PQ(=* z8f*7Iw_s{$m##c&_j5XBR`Oz&Uyju@Maj>wY&Yv>Z7{P73oe*AXew3!5l#!Cdwigu zDWUJB$YE3TYIJ)#Ktm3CWjW~iI(NapweY@j0rYKJ$e}%1`xBNVZe7!Oud}_3C?gMn zJ!TK}p@1&B4=RYKA0{_Uh{FZ-29a|!cJjj|i7xr;pUOLq?+@{;x_B|LQ&`wWdk_Jx zDS5q2WdOu?5X@n?oS72x{f$K2>>3|X8^#epvAbSYlbHB}q<<`FWXQQCQ{>~kjdH2t z0dAMYKI8&mOFh&9KL0Cf1(p?>J%N_o|#$j~fD zbjp#YgD3ADpVVqq+Hw{-|9H#p)>UwRt}yO)umk2)J-~)`?9!au%Yl`CL438`9}VC zN5I&+d1Qv6Kr{L%$!VCB`lcTCX>Mw>LbHg3tw}DlkHk;lz9iCRrN|q<{6A_w4=Y3m zpUUDpHP(pcpGt1HNvn3NY#aTYAK+rum^0SS|BbaS?a5h)3Ga6>B#(iz{Cc!k-E2Qt z`@c)JN~B9M8s~j=N*H%4Jp-{pS?Wm3f)xtxi`d+(?dNpd|6%XV!=e8F{?Umf`%c!W zD9KhKg=Q)t`AA8~(o_->vXe1W3Rwq*PsCIRS;kJt#8{F__T89K_GN~QH?wqJpYQpd zbKm#(`#IM+_ql)9xqjEV?mwnWbItOa_xtsFu8+s#sc0EK#&qF)!N{;Huy?HXaK3Pk z)Tgv`wbcmS@qXjn?Q_uBY_HS9G1)U?Ng@}vWX#zK0O){)V>&Ph_VeE`kM7ud)u%{` z3T^4SQctyz{uxi(3Y)VyUq7K9eeJy?sk@Fe_=$XNiBCUlHv%Gsvlgl9OBi2IIPId( za6d4;J6D=iM7ZV&pfV~v3iULa8J!KkaknPpFsjv>zOL)5{3>RjseKweNy}gzPP?S& z(uR@(G&yVqBd_%C(2={N-7zd7!TRT#aj8MIb#Y&9jkX@bf|Y{(QFX!CimS`rEQuGy z8$EfQeClk*o39YJ3+IX*>;(IKgq1C>$tXV=pd$qj$|BwPBHfz zPoKZ%2H6$6%N1u7_Ql7bbxo50@Z=qpY@6A72|vnkd0li>Y9stYf=M{)N;7imEhw5c z@_?-ds2CCVLcUoIV_OChyN9X^bx}gjcSjX-k8fA^%LJR>>+KGLrFqnR{tat-F&yPL zB73muFx10TgC3YRW08176jRW@e6=am}HOn7C#3 z%#2~30u07UA$lMmVug`+yCbfvK|APo`zJ!MZ8g>-&WESYKiaa3Pr6Sp6UN;xZup^A!NJP*t5e^hB4MY*J`Nv3I2#IwAh~)$N>}o* z`wh-0pz)*xiYT$vX|>Dpv`~Z|%fCM$6lFs5s&3RT|5))xe#9+t#b*i9>^0e^rKP{5 zzIKE;Kqo9o`B1wfff@io4vl~dmS6mnlFo{ zDa*&W7$=2}8n|R&xrkHQM5!i4%!CIskmUfC=qCnt;dddUv`bujr^{p9!ct$FyYG8; zPx@1V)D73aLwd0R|akDl;J3DIN~#&`3Hop*g+Nk|t|m%GXJ@=lE9wZ%2Quizh~ zDd?Qdp6u>`D`7vkz;|Gm(jd}r*ac0`Z6(5NbO(N|4A35!2bzumVqBa9C%UKx--5&P zvi1PpjcvexAm99Zl>GmjzeCw%*7G9d_8O>~ot2uCn%%^%Sr`9?Eq=CQ@4j;S0bu!@ z7x_T7WwlY5~pdLL(Z7zOwAy1~Fk_*qg5 z23hmLIqdVX1$`x%J|EXu`K^@G`k@!B@1@?bJ#zjR8=|9F67ql)J?#S$#FiGie)Dgb zpwr#ARt9#ClCf$(#^C|9jvCocSJ`7Zw|&1Sqdd)C&n3+jQR<4PoKNm!9;p^9cye|Z zS^LiMrLp#L7X@44XTePw0f!k#VRRT%$rE0MZBe)Ik;jZUWD}>zVqabLN}1t@PiXxR zZ)beY3p(m@@XOwP6{p*RQPrk-Z$n%-0usz)ca`?OPy9AGOS-P!N682h%T2#nR&kE+ z<7D&2fz^0^rbsiHW{Drx2mT(X$vGHeNU?q7Qamy)(B+Xv_H!IRwBvS^^ua?fy#)F9 zCy|eA#po>q;{P4&Lu}P%w3m?@o!L^Obf~N{$a0tgxPTeP`7@InaL`TOqm z!U;0Tt}gdLVf%Jk3ob|nZ|zxKVy+2r868W=Rl+)<6$$N45(5rgz44ffynk46_KDeV zMx0dUo#Ic;eGvwBnP@EtNvoVnf8@p4!`eq<G;KplLIJ-LpEx}gxq&+`i+R+nupQNBrHA2Y5x{^nSTFK?zEKvf5+-Ajh&2e&} zVxl(s*OqptqrCKo0k@h&>a@?-AC81CHZFU2B&lz1Qf9%uk0kOT6vxoY&`k3)Qej2eNCER|2*fU;9R39af4`A2J)Lox))83@_HJrPPCsG8{bz(av|ky4 zP9udG9NOaJ%{Cl*%5LEB1J`nz9Epb{-eKwW)gigX=IhZc8QU2lB*PvGD@GlO?d_oE zN0w?G^cLUqZ24sHDJe6B?8E6S8MUQA!iZf9Qlj{_Tt_5JfG%-nsPTM_w!(sL!C=U! zF{l;_bc=D{mv=;@Zr^;vGN7xr!yomr4A~V`+QOdxcd6&APaSJhsqf8ug19B`ZRdM4 z^4|2%8~nfWSxW3Qq#N!bN_Z6^fmWuaCW2D!nYf8z9uI2W9h3MMXzI_kSjiRDy*8hP z$X`70*<_k60f*(n>;?N@AMHU|-R-&U^OVq}GEbY&maZtzHjLR27#ndB)i|oA*{tk4 z#%zKJDPZHg#qhFU>1L9LXDRrWQ`UoBR3n4TdE1PvHT?d`O{RKh+RyZ3@CmIgN!M>b zxo1|l$+W*XHB1q-6ck02;z+Z!zX$k;wyw~w8++A_R{c5dHPCgS3G7LYtA^Vz*|OBt zC&1>h!l_mw5UIUhDy{Hci1p7^-LRrRZO!j_s~F3h3y3|x^`zA7?g7isn`;7NE^9g* zzJYuaQ<&3}eFE%-K-T#WVN7<=52(9^bFj(8qFcNpSbCAJvm-{UDwBEHx>;5IJzR#voBd>L&=^kRu{5G#rZVIkPOGG}=hK?O(Qwer>v)peXPP+rM{-p4aW16FX&0l=TSsJ+o(~v$ zjW+wQ<+Kp_u&zEpl#zF771Xq%=?Vy$K_W$|XG5EBLF}&D+q-EQ30b+8v%{ZSMUEMj zo)OG{c!mlxQnzl<7^wyY-O<%nEDaC<-%Ysf8+B@e|+dC6R(HN{9CnIH8U$o1jLIL+0*ch48c ztEYK(pD4g^VLl@(@k<|E!*0?9TBMa^oy}X*{T$VG`^s!CH(e7rPukrm-EQjpvS(@H zyAwo1Yl00#nsTFVl5nhp-dJcO97l6!xk90|53}sj#%r(cW{d~>N%#L+#4Z)_)P%oL zQP+8S!fV9v{U@%bhXh@yn&v}s>%i?Yr!7uvJ$9oy^Lw~tSO`F^Wk$(2XHtw8t%j25DIa&W=td`RA0>-t5gGDB3)VWd@{Nv^?zVaqTe} zqfsRa7^6F8$KA$x`JPQ7OX!tXsAB+7%ZstoG?##2MYIK zut9YR)G2sYAQQUxFdGlPsj4G#Lu7|%D0N@S+#~N_^G_8*Q?$C=*GI^-b^pUo{aZ}M zJ)zfzIeF@_^i|d)2~I;BYq7diy@R;6(`tKFW}U#J814CYg2bqASeA@6gG|9L%Q9s+ z{RrK!_(v=u`l*s?_SNWZ#LN1xb+xq_6wcE9t;fPkr?0DTA1T$L%8&>!v;*yW4sDv6 z7UM!iiY(AF!zT2tYb}Z_p5km$)VP(k%Ox}F%a0`t;JEq#bWzjcv!;dlKyhiwTj!py zLalzLiwniYXW$p!v){k|4eQ>hWwWj5gs)y^93il=BQzH5Dp1Jg+S+3^tB3nq>)E*i zOG%`HTzBM%3GBgDZ72%I*-iKjI~TPM+Aw5s>{@RbL&+_ZoylJP$Ue>)|Na{`yFJF? z9G;Tyi-|K_c@zkBZH419z*$r9=FS^g@*!IltL7qAKNu?#rtH_Q=B22~3=R_AZn zQ+Hs{V}a`3c^@RLgbpZXnuq@WS%%Wh|6bmIU%h|Yw*Sxfo>O&0Rg+y9pUX?Lgqh>Q zGN*JBUx2=67_db>AOey44Vz1v#8MooRvZVrujHy#mZ+Uq^luYC}uHLoN0Ty0%cUi~{m=~k$B#(DNw?un~z-cpj4 zO3h~0;V{B8;!WZx`6IE`Y^j?<7}cgMS&zOgt#BaEuji-x<}ajq-)yK#D80$SuNEaHVULZKYZuW+ebY=%F1AZ z?N{TOG#r{m{Qu%8_o;xamYR^4#u)(G(mH!#A{}{{JJ?oTEX@SF zt)h>IY#~t8NP8a55dy%P2z9Y4*1m@3X^trwq)=_4>+^mG;qaSMVkGBfL zQPDa5=D#v)=%3z8KsEr{S*jb*skg??o&CV|AJKL0BH-K9c5rUs|5bj=ZiD&vVqGxo ze`a`6GFqPAH!U^GQjntJM|u}qgDXO$ho;DkaRXWGFWr7@Rq=l2X(w#)+K<3*501V~ z!i{C9ue(ueSyChhFQiu#lG8GIl>U6R6f(AV=3<(a_xh~rN%uQwpD^KRJifGE+N*CV z>Mok3IQW6YFk;ss@2rkBP=3y~;zx*1UeM`8UO@Bj4kXJ#Gv4pcgTQ*y&g|> zHT6g-G~48){d59e_Qcdwr9jTY1NHS<#&xg*^D*uCYGP|BMv$dDxFMw9u6EBy`8&%v z@49uXL;c4c;p?crw)V`T%wXL?krsz_hxi) z-Ci%NzqiXZOt*#>{?b^0{nkSz%EU25zdQ2V*E-#6OEL(`8=@8(*eRYg@ZAqW#wfut z81*yL=2cx-K9t$p1&2F1Z?8Knss|p~b?8l>1HAfMFBTT1`c33@$9qf%k#-3+;>K8b zjItV;bDWMwJ%$8?P;eXtcO<-NKTDbp+%)>iTlZ|eA#c{5QObv`fCTG)*;TJ)zY1g=2X^SHCG+<%oS|EkLek#`I+Ue(is(nUz^ed)jqK8di1wAgWbN=z-e4E94% zL4j4*Ew%N&6x;lOIF-SXs4uIfr$0qo<{qrlcV8Qa46l#8Gesu$xjvS)+hepN=?vGC zKb_$fy0$Wrjpn%_T_l6xI2K~{Kva>Z(9cS9`IZ^k?*4Q~27cROkA5B7R@ zE((*`gW}`#k!jY*5gCRl-2_MKjzjEyk8gYMoNIwh+CG>SSbUdw=OlZ5bfirqsco%B z<@x@UTrMlAC1NFN`5KLz(EtH6AP~$z(-;hF(z) z@Ox9XQ{hIe2JiP@c0W~F2oi@6v=s&RuTn;#Ny_WK)l7@`Lqq3@KEiDw303W*1+~mQ zuy|QCSw%^IHg>ZUNG_wwe;~`NkhQ;K5jDG_;n*u_ZuH##8ovvdeGS zmG)qTLthmLWu>^r2PaP+ zJgITQzEdexzXLbwKV`MlN%LErUhS|FJ40FJzt$m9{o~Whw>uvb_DAV?t1mFj3@xw$$I8;=~lA&wm7cV{lI`V`1L$_!%G2z8+ImM)->z;7sTJGZxIj&A>)sVg z&&uku7TQqrmpPhxwbotQs&aKiqT)-(%D#L2Yy);C769qG7-dk{JC-%1S3F_y+TCA1 zY_Y`gj}JXNwF(qc>f@!qTvFo=;o&7n`ZYLdf8VZ;-73J0;45Th;gV<&5I@ee;+(Vq zIIeW&er(TW8lEGpiG%>g=fY)_9srEFaz5dI+5F>`ap09^tmCMq7(hHRYr#S%Kt*Tz z`Ho+CQtPOHeIEPI?>(*5Gfmi%*bv#KdR`%s*I?zpVT2IkFENy6$QYx8opO+t#k!0J zIuu8rJls|8foID)`Lv1vwNDS|5l#IX6h?t3Dk;*^%1hj@%rhPgq?OzrGVuD^y6eQs zExTtPDZBRdA8EI_gFlT}CCms|514fzp%WzP+xI~YB~81~8sAw*y$uhn5Y_s#KD>C9 z>CthiX#P5gS$4hyHst-n_G)_^;AqV*|AtY$q*teXqN2zmP`<_cC}-sk1N)tA{`csL zF=6G{aG&OT;yLR3Rb#Nt$QAiEIL$_6Y?>-WAsb~I(iri3HpDy(&}9Wm!+SAY^)#^$ zA%GAiSmdz_(*>AIzoP$5?%fIAii|}GA;jjev}b5*`mQX>S3^US_B?;Z;&a=(k_wQK zt}In(j21TC5!D@!;fHc8=+~cDgqp5is83uGWg2^F%I;Mx7Ct4UUh;5*U%eH5jcEZW zCET>HIF6ez^5viPQd!?@9xXdh}5 zn&cN44_0s?d=ZS6+jS9^`MjR558pRERL@O#)8s)PTE5KGchABQx|;!*l^ITRaGScE-?DipjP#=H*Mi;*y7N- zTasDWvCQNkQ@lJMLaSM8E@! zau@Kl;x0jdA_VQxXJ|{4n%XKZW|pShVNK#!hdzrm9JYx#nq=}|vV2;bFC^H$uMtUD z2&%CgCE^+Kp+D_^)G;+M_a+_6c^V5i!5tIVFY?Ho4ydw=W*M>HV%>b=vbFmWaF!rg^EnnQ@7Om*1{*N@*&AY;*02?)1B?tK7G90 zxomEJbQtUoiuSMwV}gcS&Y_94WFyk8dv4GZ3Av?t@7xW%zNcA8)G4baWaV8dwh^EB zL~y5*mpzzTEG>F5Qi!9>GN!FWQu#()a2K2UUq$?=bm8uD6UsK*^V-~j;xgx;_H;O_Y4*&{;C(WY z@;KOi^@+RRBVrG|{l`ozR2-b?$8;&}W*6vM@9*abpfAwyaV0Yf$KFkrJbM@} zGMb8GK4cI^AFzN0j~Iq9+Ts9)iqIdqfSe|iQYHzNBL)}qX-8u_#N>NZ*0h9s&3@tY z$)gKtqIZKdR3fN)X~z}$PkchA`c3{Dxb!{|OncI*W$L35gKtlU5rsT-j&7z59_Lg~ z{5w|YTEVZ2ga0Q3sC|%SMp+dcWjC}2Gq%x+MqMv#53q(GT@N5viF~y2!cB+6#KxEx z$iHE6uSBlDX?)C)6PbduMU?&um{Mzj>a7_|7yc2stz?7*-b~$xqxJ9_Y_WEN1o`Hls(9$y_zn9`2dzDptRt2lfF+$_ zaRY+UG_YIHu^M1&=D3IE2%wP=^%&y)4KwZlE`#Rk1*}^S@SD6e5l%D?z{{pSkNsnj zsQ+H%|C(2la}GOIjfWBMBB6sQ5F%rGP-S{w98^gnqYo@BMW{QEnkk@Y@r%cvSI61( zJvWyg@k#MWv3&LHIA7<{C!A8O3-Bdj6p<`GV*8ZBfGM ze<~qlaDNBv`P2V-C5FG9f|xZ4U^1yM8*c)YK~RLBAG0L%z@dMc)cwi>Rc_Ye3};aL zu|7=44}U;IHne=!zTPoia5~+DkjtV@EB1}A_oj!}3r%`YyqA9D7mq?kW))FeZ?5<%R+U{`74J;=9RDfrxq^7kcRM`hb9DQpARCMi z%VCgrinbBXW|-#We0*6rFw^rkLtmh}eYWl%KDiQK|G;i(^CNP2jDlL4rDsy}JCIxx zcdsnMWj55E?Hm%g=l%`^P{7-)DKNyFo->Pj({r|r~@#t3g%O~6?Cb| z1g|4D6+WVE_d0hOql@X7sne3)*A!1v%2B67n4H~_%oCm%J-T*GlTgSeRlAw3-CkjV zAW~MjLLOC>$ZdNN2hV$<3&oE=gIdFE3J*)}I=PLV2B0Zph6S_o2TC8bw?POF7a=DYS0r{X3vVJS+#UWZ(?bFeVsH)ds0hUr88~E~A`+jl0-~#!^ zmO74HAq6$uL9u!aV4*}w&Un4^VD97*deCD}%Gg-wf%y4F5BE=>T;k8Eh~GJya$A|5 zY<7&r)$x}@lpt{|Jvsb*0F0?hg##K$xH?FeLLlsv1q#|{_e6KmOYZXtkSFqc>fH4$ z-Yvamo?CC`5rqu7cG}v}2xh>~WKM0eB$!k9X7~=)btsr3KXKg=p5QQ8Ub2FYGO}bC z22?oic9OS5c7^8Ed2~f;yg5?%a6F~x1uz>2*?5i=rUgzj>B0_!k1+^bsikogjnC8A zGv!4^lbx^TE6F9T8I7P&c;3GHSwYA(sk4?p(|FGCf)f3~#VPy}ooOf{hiS#v*0yTP zqfg(CK<*5=B_e4bQa^iz_@d^_y#TUW*7O}7`EQpc?MWi(Ue^7N@L@|Vf=oM0hJ_ej z1X-CEz-U{=1Ws5@r^nx0(e{1eFgeP(qHr2tAA2ov;A3a!4DmifXp(iIzeyCby9|Bx z^m3Wz`xPdu*X5~GKdNZ{+So5yqBFsDT=X|AfS*nt@nImqzzSu!5Rx;mH`~pyY+yI)5g|3RWh_->WbK#f=3soY<*YZ>!Nt z>HztPW*|APNvW?ir>{{+v){I+VxF<;5vsrKbm;TC$-cC+0y2Ih9a2ngCZHSXAa-;5 ze#4{#z&He@ZWP^$vMpEqJ&!#xwnxg_UtH38$=`I-eyf{t zga&W^0MHmarr>;#zExj$q&7ml=Xzpkdnl z%Hj5_+s3;hjs60D+=t!`OyDZsrG!UfsuuAyW04WFkJtI>$6w@!5UG-6;haRKO$`*R z8sja|QnOwc8dOqz_N3f_Q$VmqEQ`6`uFVpAHiakojJk1l>(Mr+tlABE1^^ytRb)Xf zD979UEbVLHi38W}U+$+Q4++L|Ywea?sgxv!OqVqXUeo7?pX#?n*nM$5uPDFQ+936+S9zT z7%lWAD6J@RMQ!m}kKPX0W4^z0DV<(`kiQqef{U%2jLEJZr* zDa$H&3K@%)VC|rgJG}5~9zmx+U77v-of6{gK^x}?|EA1J=%>FSybq{i>sjQ=88|F{J{qJh$_Rb zp8dRWUqQ&lRjOS=(}9}T23Cx2`MdbhT4|2M?v}G0Cs)yT5g%u@(-tmXsjiYU`$nYw z#6l`Kwmgos$`W$`upUV)DHY^VDac>mND(5<#Pc_-)oy-5N*Q?+Ndag!nmK%1A0ADH z_E&?f40Q6Nfj4Sa!Oo3i;Ml3yA<#1*^wl5YUYFn~0Q|wuWFW5U3x$KLu8!jD$3cEX zmbe)wp@Xu?*VTnkXfrS>4C~<(R@8Js$^wDzPs}5?W2w@g)a`EZ%PL0ZBPGw zsnhBeysLvHZpGQ1H&x^_%8!B$`z&9k6_Ln7XuAhjS-dpzxr*4qjuu;W&byhVbMLhD z?5?DIIV$YG$#{Ag+idjO2c(+K766sJb3CDvL>D+Wa6G%|dZCNPQ_K@h?A5ERrul6r z{7&o?)hDq@lIU}oJPj>)z# z2_T^H53vrBFUT>M4$uxB%%T^yAh_tLmY+}?J;)0|T|E8$RaFF)I%ajCcD(aIr|8(n z>-)2BZpCrE6Vam+mpr|u0YbeQ)fhH4*3Eqltx-u=C4Dt?Y@p4gVih;4YWadS_=5LE zouDKJog40y{<-7Kq=iEG0MioFhLvm#WS*PDSCf}&r|XQkrGP2o4~G*yR-dAzSULdAWN{wK zFPg7Gj^6ctRa5KttSpv~FH-Tx$xtcoGJd|6hdRGuE#l~M(X&iE^N*U*gkpCpk`)FV@ zXG+V@^i-i~ra8L9|A*DE07H_UZpD%aYr*WJ2S@6XB^MEfv|_}8!N73q5pQj+)j&_z zdbjo4O1GYNUXA0q@Z`#7g=Zz$?_z-AtJx%v{Q%0}^XQEP8jyCcW?HfK(Aqkd$WuO} zyjQBBqiV@sOVedBM&|jW7h0>cvU5_(%&Hz%?2fq}w4Xbc>B5jHr*e`V9BTb5q zOg}{~$3Cgq^q(?`GgR~bW9w0um={l+$iH~p)8ZO7=>=V8d6TKf3Tk8*v-kj;?j+6h z1wGn?X8YJbXCg%XQrd}9<7CNP{K2MH$C;!={jk+u^} z>YOiN%psB@Wqcz|A0a>Zt93xvTo%4Rz{dS;taGv7Rn9F*iOQb}sE;^El@N)RpoqjW zi15I-&|?D2_fLyy0n)vi<7@57jlO5R_@$3_z>--NPD$sDZbkD3cd;J`&;CTkAYk3o zR>KI?NIoTf312ccKH=`$luY#nT)p{$w4(=_@(z5yct=wOO1X38eNUE|!wS|Lz9Zxr z6_`7bVPy(=`HYut#y3J}!L^Wg<6Y$|TG+}Z>kXBVnuK&gg$qTIxBWf{0npw$;{f|T zhd;!jYs#v{3P(4gO`9iZRJHe49V%@+b2ER}Va+%7+CR!8#KrkkVrrHzm+gN-$U(Ym zj{x1YX?*+YBkUeGy37c9YNjx(@Q;+!0t=SGf=A)+tM{Lb^KDt4g~1MoXAMp?@N{<* z#;iaH59%MYj6pWe$eD&D$)O^>XX&XOQ9tFJi-QUeW(IO`OB7#dh`6y8%52C~^q7JmR78_fk4B?j-ooYF!L&Z0UJ$7H{Yjy*@)n_bIDObAfWL5C znRyA*SqbQl(xCNi8Ry`vfmWrTrpuqcxsDj++PU^<=e_wM-udf}V9Zo2T)fefd3u;L zh_Ar4!XaC{D6*pO%)rmI_EW}I169L6)|DNf%FCuHx0rX1N`(yPUU}RUXF>R6hd4vS zHfuj2@*t-g90Ca~RwCCz{Dgccwk6mwp(kf|=e_qb?QyV-ixfO9`%#wAzdE}9u}J`m zX6l9Xou$t_^C-S%mNPk?E0a{*Q~bSUMxX!V!%v}C`hy71FuSnL2v9IT#V!LK2run8 zvZPjH7&QicG>o!*#;rW|^MuLQR*7>pzQ$|DJGl-zDR%GkS{bWEPC7ee44fC~uOV`GL}F01i`}~*9Cau%fGEER7ys3% zhZkF@&wJM$8tPkUXqa<>nX7bmIg-EZH_Ud=%^1QP;!V`>(Ddd~7-bO~p`;8w&Vxvl zJ2O0Ija1Rh+q-9ulH5d_O}bXwvP6zRe9;st}6nmYA?Idl+Ej*lS~E9cNryq z2o>z>f2#BaHmA98!QfD1lDTe>jJ3-TjoY`!Wn4}w&mO$+v@H)FoL4VAgJX&5w<{eU z#NICPq^v#-bS%MigN!u3x?5mgE^qve+xxzNt+Qv=g@p&^ap^m~rls&qQ5zMBd*dNB^$uRMRE2&yFctR%{6NgBnY!^+ZhJDdQn2tIY{A9OCqe)lkuJX0!b<6?7WUv@7pLe(kU?x z1$49eezhh`L+ieu*Is4WNPXi^Rjx1J^X|vp3}?Fypl@f_50M!Ml^M@~#f?V?u*>t2 z?mcMusr(iqf9Kk<&V1jrsWZyW!$!T=hV~Az{^XPZM?-!}{B`^4wghtz z=bHg`DVk{nc}_*fthzfLhz9ljAn_|hGLq*>y|rWv4Aq+9h_&B0w7%?_I#%ZlN|?N$;?|i0 zxo|$iqXB4AK7&2fkDTUUx97;;hz9_ZlsGIf-LdS;l$}Ra65FvcjG-p&J|hz^0EKBW zlYX9LvbSn|=jBZM&#yi{D_E?}yBA`|2n4@7XP%?oS54T1=^4EaM0$cHhiZ&l((Z@9 zfqm@U6L9H@-;Z62YDIFi@6*mW3X&ahgEX6toi&##gr9!(bqtG$k1 z*57(T;^ljJ<5op3`*D*fsQ6k#c}xwKSVH4T7rMb%E8IQX&NGK+hUiUeeC#{!g1dh> zvW{D}kYR-G5^GYg^k9bbAU?s$3FrxqBba7%l%LSB7YHAS$-d0PFYaGGB0yB zdhBg5-aMGscGOvx%;QUSgCL8_l3|4Prho0{XhY1FY>~@yKvmQESaD8;gcai$m0= zUPA8ktvj0!WZ#^RpdNb{oMw&mVTJvM?L7zJ(R8cImUN8d3U;!@NbZP|K za*EzXQ7ROPY}z~b?72Cys<=gs7^^ZC=SWI_8p)US`TeCmtaqm*7!k4RKgeTPMh9Sv z@8o=$Jjl*(()ojA@qMq7$;%VWi?74fMtnTHiESEdqOq`PFz+=37RUI)t^;P!XIO@! zK$OLLbc-9m_Q;Euw*44dTc7alSPdg>k=dE&vr;f!_;H4~i*<%+jBR!$PQBI8#)#}8 z-WGYh>C_xn=AU(0&@}6t&;wB&yFRx~7tZt~O>M*-036PA^<$AGl#zU{_l1 z!`VZHMQt-77A+}U+jfrv3K8xl%T%A|lc~)aw~oo5a-4xzUR}EB_tX0>(YMQ*cpe{b zfMSW}Fpkl8CKPk}9Eb@`dW+BWUm$X(t;Da7zfjm6A4*Y;eKSIS+#o+`r<6X5Wf z3EonH_ECBzU1kzGSzSX5c~bKlrw+V{WhP=mgOTb$?)?+UqAsAe_bVyS%s|7IO<+7B#rK$|TlB8(-cgZy%F<817y1msWDegCG}pIFwv ztOEJ3S953~QZh^q`?+aqHoO~TtpB_W&|LhNi$WOSKg7@ly^iqaZ%lkDxIzE4kh#CP zD5!Umm;d_~{omfA=|D2HcaMX%&S*S)^42|x z^7FAr2fqB-hIzqh29uF+kKxti#JB7W%n^4x@~$QWk7V8^nrh7MYcrP+9s!H-8o_1v z?|2o2=e5!5RJP8hK}}?V*}9pAgckhH zl+mZk>W_1>SJR`#($8M__3O^}(!mrY+MUoU0;$ZhB#o(rb}(2tpRp4#Djr*~Uo}a# zi8W?Rtu3)G=MY6brHx@X+jq|8?4Ettn2W95pw!hh=#+>4#n%@00cSPo+Y1g0zbzDy z`gH-oon`xwWDu~NDNV_o1F!;X*qeVs5C2#G|4(hh|5?c6-v26(oGXtVnONv@{6!&2 zv8}pA*ShU+w3*o+S63i5wo?g61cO>A!z$~Ii=J{^sHw~NW=U?@Irr%4Ty9^!)-^S)h`S5zJW^YI zz>1f#OY{J|g>mA$5sJP`wE}uX2|YloO$gW;>dGEM^@=3>lQTbA(RC^E56hUTin zZ>!`<9Vnlv_5y*;yT4dl|AvJ7m%q`x%n;6SwkA7$8O=>&Qh1}68?ERP)VGZpywMuf zd=8V%x8()sJP)H zb?y3uCt@${OFLG?L)=j}+eu^nk(!%H{L5*1j`MJvAqBPUQo=gJevdhf`BYo4_u)f2 zX+qnqzoh!m4;Rf2#>>kM%I&5-Wwkt37yJG94Ksscp=S(3cCj9D)Tb3M-FT|ReQ?{3 zrH+y%hdA(Q{%~VD*%E{71QDjn1v_zn1Q2TG-)4LV@WXp=~3uhmMCVz2xVP+i<&7Ehv(evZcI<(d1(OMkKBUCyo zXD-N}Om_Zb%q3!UL@>orsrj-w@&Xji*)0HsjJm!E{i1rA< zynaCV(M~yd8&P<^L)??IM^IWmiN9iNJ8by|SiNHdpx^{!My~p@MwR(1s?fMdNHPH_qhUW+mwlMWJn)sN z#O2_^kXf@@$i?5y3k#3QeQ(o;`5H7YdC~R6RUzef{t9$m6RX{aR9N^yD%;|mXrXm>?Tv|1Z&>K_e!)(DI7CvStW zDnhe8}`eLa>=<75=46461jz1Vy6EU#EXxSpfh2PN5n=2*jDkkb`o;$ws zcA6pXiaCITy8XLDfVqPiMAi&-pq{|1wI6{`B#%r}s&R=d<7rLt-!KPT;$T#=!sYqp z=t~Z=lB`!R(`4s({>Xh(+Mc}aW`eN^QRncB(Yrtrti*yd5nwhUPd?-kIK4GfLE}{C zdtUM!b)ERJxAqyRz>ZG5;Qh3xT(#hG--e6FaIeeHGw*S(-0y!$nwl?3i^p2$J&5(#tw&70OS+H_;qocRL*gb#0Q9{MhXLD-G!=F<&61D^l#FkIz*SO{W^tN|=<73<(S9bJYNmerD+tYap_O?bWLZ9c+-Up95;1^(r^6%Za ze7^aaA@63*Mn@^HQ$X%jw;Kh=tWF==aVS66`nCQj1iK_=p>VoBB49i~oOnIq4B zwk&s`id{`kWZJbLs0>P>yBQz^}1f zX63cX)=J~O4EK@qgzj#|5E&9t5-n1KK1xTQ4OebWlQFoubep?=a_xB38PLrrCnOW6 zMuB3-K{|+#^GPTHBTqHFJ!qRPHu2OaCt9&Cc#pR>wVZHG)Kg3~2?@nh{T)eXL4(=MJG@P~$n%>OJb;%Z43fcS6CPL)w$vq50 z0oKE6q>ln$_M$1b!7GD1yfg#xd{9%{lAeIA;l*~6di#X6eU|O{mwt)wNJ;8jySoG@%$lSpR#+rZ&ju# zHr-pRzbB-dee;@tm+ekDz)a)tg9P&s1?mR%khY<92i7l8;Jd~d)CU25a0Jo~A4RTQ zjR8#eLP)}-S2y8~X(UVcyVLMkGtbhH`_?(py0r7L(ZKwn2fkX5Gs1}_agHMFJmAIf zvKvI`fKQ9hn8j4vXOHYdbkp|~h@eSPGHL9!QaACv8@$!FFL}sgZ@Iwtq3=Msz40b} zh{HDm==^8c6&M4wDG|ydSmn2BuudA7xV*l0*UlfaVtCG4MNIj`AC;@_&k9bAt=BGx zF?rDzkUxmBDxu5F18BM4(njYZNWT>!m+{NqRQ6BDKV0ko6x9AyZ2nCr-Zz7Nk_}KM zu8Sb{lVKp}ZcAG^cTvNmrR8q&^Fy~&?Fn8lT%$2NZsetVsDGYSeycX>uvsA_(q{D( zS!I>j#07F?Mrk>_B7mU4y3ivHG#F^d?#iLF<%p5FBK~i$jhV^?9FBL13iN_5a{cf^ zPUT@l=sq1*WAcpSP#(qV>4VA0oAl%B%W@jWJ^AFgJALvk7@4LaYpePhQD+W~qRU{i zdK=g$m=jQM-`NTpZ!3Jb@=`ig#m~$&U=)*{4+@BSDv_k#iHewEd_QXPWtg2M7d_};eEZ!Dsm56T>0P1 z->TJ1=2F{g-)&q+j=a<}3*n}A={R%KL<5(beAGp{v%hVyr0V5xSb#NvP(Com}n*#Gxm4!-I$+slO zn_G7}YuD&~>32mP3n&I(j6b0stYiG3X$_HXjJv-SccFhah*$@S7rr=(rQp{O>68x8 zw(VUB4ms=aaq#$ji_P~FBqBfTI5zn}18})^H>+qFcTh-O6qOy4(W8R{YrKqu-=`{C zQj`}=x$BI;Fno(+u7x$sxqeDbjD`3M`R-E?UR13etaoH8$bkzf(EkJ@7jZM zR63peVt0wNv$wn2V6WV5!ScNi(qH&yDL3+Gim|C+2U<(Z!4pVvNPfNqS>pta%_1ip zOX4pgZn~{JcqNc`$=~&%m61|yKLgbakOe%5pe2GVbji#`r80x=Jg)jyMP&s+(Z?89 zc1-|H_7No^uXzk_XN!EiouA|tiBe1(U<1)!9{y#oMBOJAi$ojj`suzBR1>g)fviFC zw~%wMBUi&=a$k0ohBu{~T#;c9U=HVFZum|qFEtSKG4kBch%I^`h38WNwBie{=2!Ig zkU;D^dqH!hNnK^Eq+5N$&ufiRQ~PJrJYC-N-DVk{JJJ>s#+D)=S2bA4nHrT^&yYCW z#oYqCF6qQG;sC0D3!BlVu5K)&nip405YdyF;8qFsk%JoN+9}sQGB7 z39tPAbln>6_gzi=(l!Ff>_n}OHX@V4XPPO z>|O49f9$n;-MdXu!S~}LaqYNZpb<&RL%m1UNQ(?8j5$P+Jf)_%GDcnHhsZK^RwYIL z?wTfZPv@!~uf^o{ExF{M6nm_=Y?#^NW^WJK^LCI3q;w#y8|+EA(ay(hByQDI6B`KF z-^JEKJ_a{1FciNqwzVKx5Rq9gqUVoY4dQs(d*w50&-#(p*iRHBn zSm7QH6#3LX5IV;H2r#_N`<vlu`QOf25(xw8+rZ>7Y@53!h8kIl>yI{Z$xQrgmU6(yBY+je>TVpR$WA+xEp{FiJoy*P15qfvj zO?d#Te>oOKG*jf=TX*v$*_?R>=Wv+yhFpTzEYUt z3UBId<8migm0hu#*Fxg&dq0ue=+&@Rxu3flE@qB1Jfor-seB~DdRVvCvcGbRSO+|Q4*jOP$~)0_tYVMV z%a}h$jL5CQh|L#QA%iM6agv~`46wIp!Y_Q^!0qF$#kKxP7?xETq0HarKIPoJuJ(Tp?_%{QAG%ix{){;t`VC5UMp5Iw=UpGM#4#9Iuak;RSWxWDSaMg*kTj0&EQ-^wmE4>-^WQ1PQLhOi|@dfKz>?qU1l- zZ$E&E+7Bd#-k$zGsMa8F|#6a1re-+uIH8xG`e|>wTo%I z%Z`zbFG}Eha?@)_E*y^Fu0J|1GE`R2<%3UTVY(co>6yVArk-J#EvvhZcJJ=-@SsW2 zg+4S1trI$)94}abyV*8uGX)F{SqCv+40io2aAW!j?;l7V!~-Qf+~7H1soll8gLX6G zIxihemD<9&cDuh_p8w7pdmtl&phV$+h3Q|(%K}|3J`^XmpvCU_m7dN7CprNp7yLH>t&x{hyK9KGJ$YhbQ? zUmSVxDNpaCWm}i50E?hC_>}lQlR5%C8VBJ$tgGxmfO&gynbb}#D5FvPGUW!HGdRPEDEDHhpP~0r6kpZ=k|xHN!wo8i@ib(iWUnf}#rTp~sd z)+KLc-*zR@_~1dNysSj{7t^G3ZB6fvO7}Y5YaU*`=JMGs*k?kxhg=;@8>+aE+UG{1 zh2RRzL-e+g%+zbRJKyzrl$)o`*hs}M+ii)HO_Dy$JER9_Q{OBLcnJFr@rJ~{XCt%u z+7r0i+-ltWr8-Yob!{A-TtB#Lam6G8BRvAAoe9@>wkr1XF(#Bneu}O4jy?9m(j;z+ zyxi7-Shp1tJYzPg^j5^`j_>O|5bi$hmV%(=}InbjN| z;W`kU{7Sc>PO|n)t*Aw)dE1{E4BR|uNAwdC2@R}E} z*%*FXLnjea3+l3a_3?BbnXD{rl&W9HDJ1^>JcJ(Sr215W_{fJTG9NcY8(v#%)*uvFp4is8x8t4n&8_GC*2q5ax9LF@ zh)`zfNakDEIxAokCsj|~ahxc4eYR-QoPZ6uiY=W%yL?5Fyp>w|!@0Ng&agHD57VD5 zeF^MaZmpILvcR(n{o;0je&1XWI{Qc{F5yJw<)Uw*TuFIpM#%cc1AX|1pWWwrSZ&vz z=+to@rxLUX*UVw1K8icb%&nC^(`BDfEvr|wQ@33JU4x(i{q>8tw?4eG`3OkUtmZ@< zQRyC7=^Ky53_`=dlNFmBbhqVdejHRxee<6w6G)x-BQTw~HQs82<|LDF?kf1zJo$82 zxm{*c=EaMHHx|ZE?0;%D(-IRsV+05y0$dqa0)ORZ_{`Xb2rrcp+=Bvp&UHMi9vRWY->sIZz0U1cK)K zUm_@hijU9S;tX6=4LHL| z);p_*7lD;cbq9BHR13HAx461VE&}C`O~u%`4`UY`K3bdvz^`{&>@CVnkS7P-K_8)=g4HtH zv(PxZxC-0UY&oj!0lypSa{^nerOH8$16mz708bnE3mM3qH}xS^9O*@@W?y+< zWL57S_A)TkR*8RivtjF}K2#kiboofil@ylT53biZ@aC%b zwn84@v2`PK)nr`pRU4< zoH8IS9y-T)3(SPo$A}H)y#1hRXP!HN#@4Q+dKPqROg|kTwWLfP|1?)&a)d8X6|qSw zzZ^quf4S}SH!wUwpbz>No4~*PpU4Y>D`CKQY>9e)%$~mnw{fzpIJ7pAiyPN9`EWCe zS#i+(!1jSPq6#!2Z+yB-YW!0Yc(UyK^U4pX1nsdl1)@3_O)jWQ{w&0zq`3;iTu{Y1 zX{uPcJ>Yz4ofJMSjYE4_F2l>l`}ljOr1^C?Hy4}Ly_=L@b7Q6_N_*k>oBE1PP2B!{ z0Nv9DP|N%vf>+?m6c4c_#!OqxCD@J?557tJ3mR`8aG5iab*DT#u=}tr9jYi^X?`f_ zo?`0nvu@jUF>7NU-g`VUJotMM=_SzLR0Hngr?K~==m64m1|3>2#cAp(w;#{u`Uv~m zym;<$&EvSmiA^T4a;Fbn`AkgqS+EUn%UOO-6u6ZQ9zeF@{|XPQe*!!DKeg*uF+%^u zL8AS{6MM9a;6n-5iJ?-+i1L&O|AEwf*AEiE*D!fyKI;>DnoFu$F}fpAwGeb26g6`a zz-Hpk?Wt6zH((`Ju+R`(Q%m3T1cPFxSbs z3nGnqJ}}#Bpa|R30X?wrW6edhuU%<}!V8(tHd#7Bf$#J~uG(>XA-av*+Jjs%XSf}BBKhV*j2-vLWND*uR<|{o_nkJ+$O?B~on^J|MvYYW@zl<5 zXT@ZFBcu%OX*jLQ5W{|xng!UrAP#$7Q~~i3j#p2Z@l+9YWd;aBujC0 zbsyN$g}{yh;%UD`IoL6cm9J+!4DVerC4ZO{XusKQ$6XTqG|Um ziaHW+HM8d>`> zO+R>mM!pmMZ96HGQavV&tR9tq&y$-QiFQTtY~3CILHxNy^Cb73xHyXc!LuVtowJNwl$zdCc@ONL+kY2t|E~M` z+ivV%`Tf_<_R1Z~e=fZ9`rhKt@(bb{cn0xjL(OLbctM>v`y?2-%6!x(F$hKYy=Jck z|3AFd{jYFve+?M=xBZ{Ze?7eV+%DQ+p|1kC@1tsyM|cbWGjspzm}XwOI#)RB2hWr9 zu1Kafy732(xt29+5_KO(cQa&n2L=IzZBx4oVglf47$d=IYx92mgYFh95HT>KB7%)l#z$4Wdu{-G_u0ABC^ zmznrpGYUFF|DILRN4u0Ca(LIAyzP@!eNVT0p`>Bp%i|25h-4Ugb1;UC$Kp0Dx}|IH z8--t-4l*=x$7>@qcmfGV7_dLqjv`6%h{aow5R#tVh5Q-z`vb*+G;Xx{SSVNQE;x_8 z1^-mM*zyqYTSbGX>shF?i48LP8vLpMNM~pW`bq4{A>=~@Y(0%*#~30~&)Q6Svh~T_ zbs+XJQJ~j?9Hv73KXRDR@}CygQ(w7~(3s6@Xx@ki6K07@BG8wx@GcY1He9U;TWPWV zXVxMMT6t^UfLJOHz6|0N2cV+UB)V#4*Z=6kG1mirM!HzE8%crMlZI)83Y;H$uy>@P z=Ot~bDm4P7qnkdpb?-4j57fqC^sGYM6VDggHoFNG#wjCGl07~p{nD%ES6s(eBwT}gF)-OITI z525!HSZQm>1thNj*thC`d5+=kAn4~%@UEh)n@{ihQW5lK*L3aUUG;Y_UkT&^4z{!H ztJsdP3B&v})&_POPm-%U7fX+GO?WqAOgy?}U$RIY4nE|`wG8$GkX}MCVUg;Is?_mh z2j>Du!$fEY0OSDGE_T{lGPe=pho%Qd2?5^3Wqy#T^=WXA+*9rRFV@1U-|%KiDp1^` zI?k+lXiD?TnZTdV2E$*{tiQ+4Ket}ANw&d_fW>qYbbRr9kc&IaR!#j3N|2wo{NWbN zB|yL5uaiaoC+hu#slOHe`bY2ke+h~Gzq>JW!Pejg^sI`h5-caSeIMa65|-nPr1Kw5 zpvdaLB{yGT;>W&u$o8E=+@e_wy)w2k=+}->+ITA<|A_mptQ92dHb~3tkdC;1*%e|H zVbgb0!WQ=7?kXu%ke-SsCu>}=&$Co~Kwx9mw$P-sQ{M6Ep#$?Cf2U*8B z*h(32W3xUKhXouZpBO_fbP}Bkv$jJb3G!D-+2Jd8OpuWVd1Kme`Qk}FZ_%4aC+i%v zdd-vY=q^+owUn8Avww8qV&!>@nD{UO>!lm{tcHrjtyiro%s)=Nl3O-!$VR|B`Y71( zmbyLwrir@U0BzHZ;=0+RhfpPDk4k2XdUrn@F}^FJhP|-zq6S4b1)9!U_b-cc#o%3} zS!>zSvj=c(h7Fap)Cz%)OqtY5GXKF9oB~w47E^~aXq>^- z07MIRIkM09D6xPTit>lAH26|`e=s+UKNzB$x`O8K6sU~}aULP39;3Cu)>wpNdsOD9 zMjvMi{`<}U^rJy`cm9cqUIPbp54a)LmP>1#+GoR(9e@@^#pWA9@q+Y_KtIKSEM>RT zeL<)JAJ#lMa5 z?=2zukj|ci$TJ||2##f$cX!nww%i#hd10EQdf8SJ9oL4PG(1>v**0)m`MV?29Zb== z0G(pS>m(NFJyxI86Mn;OMQ!$2Z+9W-mQe}4A$#}1V8`s_jY)NZO?U1;&Xy0=TypYr zZa@Vg1RxiP$AP^UCXZDh>kVYPH>NXnB9%2vD3+DD(-U4g7N9Wf%oq|ZI2d^d#zLt0SrYqrWFXm;&356$ z#X<_ACHX-#^0&*AZZH*7PNI$WBnKdH!gU13|ZJ^7R=e(}mG=-AE@VPx5 zm7`~dsXU!EClmm{NNA9VkM||(&U;rcoxNwQT%T=xmbf>jZ|ffAv_9}?`jiZkqJM2Y zS*WiHFe!-3MhTRzpXqWnVa25o|1bU&W3#}_PUa4k0^400Q;mV5ihRxG5~NHr(9WmG zchd4^_?zArTh9zUp1Zkfr^dAg@q0)$0%&j)w%6kZGs!Ucb3IJ0)Yrj-xjrrlI5D=P zV+-2x@JpfdH5@+9%sw8Uzw6Nn?gj-yGEC`|PDt4O) zP2yCEe&IW+SgrT$vlopkvHT%tQ+!LR&t?y`f=_>xaR05x92>X2xe|X=Sx8K7 zJNA^#m_wN_P>|hWP6a%>g5v5=*Gf6@m2xwk*hd(#2jbWoo%qg8dBvH@Z7sg;*|#^} z+aJ9_Za;UnPnnG4g-yntgT8x)IvBi*INR}k=tPbh<=fV1f4}au6sD3F)%J>_an7Oh zH*v}(D@K5X;DXuPaijdqSj0#C;)t=poV&-ROAIriw_9Z-Z(2VI5}?|NvHPKF(MnaA z%(RBb$nEgXzD9Rr5O~5iMYH3`kim?hZ)T>|zMh5d?;H27coRw*myRl~+Gz!)uPldE z*MF=GO-||sx%t`1e~H@F|7UXd9|fU*bnG8IFvU+SF?)MFAmH1nvUCr5?S$aP06k8^ zq8Q^_g$VoIN5;8aR<7zEex(KhB!nnVtc>Rvs_(sS-?lmumyDmd&0V+sOTam>56H2c zuRoHdjbrQeuPC2y+);oN!M(e%9gv95Uz00twMR<@d{2mpcXq+Oukh2bGz#2P_1~v0 z7eUpkj_`2MCY)N%_ZE&gS=i?Z90g7#G9_ud$w+yjabq$r{;mbTyOBlDcQdtG=H{7F z&gX;Tk5#9ymH33qqt0F1VYt#`f3(g)=8OCGZcXQy z#iuC|<82$$v5EQ7A2o;*tuV2bA z@_mbAWH(eXTHWS7uYu2#Y3x$kPQr5_3HB8KQzmy!Ds1Vxedu7koB!+2gpsFT=3GXg z)%zV@*|1HfZjT#?Vrl>ZbMEEkzfc3_Cpl9^(gQ4E5E%2t6mXvJfM1XieDJY=LlJgv z=0Y>?ZP%8WhIL`q+QdW=RZn-?7Y4KMavmT(@IbtH%^XHymbkeR5pc=Y5MV|g$~D95`MMOB ziic)SCrFgGel@so{Og4yk)L*&NVmirqSw4!4?8lq(49jvz?aIw^x-yT z36RJ*oT%)o@kxekJoA1G`)}5(xwv+od3C=r&%|*=IFW9GmBzFn3lthqj|ifeU1+NXYhXR7 zwiONrmZIV*>1{@@UTkjMc92foH{h=4b^Oy#vsfh_T~*{+10GBZu8#2ErviBfDob%~D@gu3jDiuv7LFWxRuSKtcVr2$dv5{AkFP(%|% z)xQKK=!EO~lZwR8Ixc8@vBe}$IKKRlNDudB)Y zZB4^p&heLkL$37TGtLTx#z-ZrzhH zTF`asYFPUF`|jiOal9?E7b5R>FI+@Y53x3}*Kl$?G%BERql}5rFmFEPbTrneG!SXq zRjvEz;kdz_%_sJMM9*WWfnG+c)^4`W&CmyD%6=xN?iw=J32@`|nX&pG4U{-JVotI8C@l1EfA@N*1ZYu?n>=trTnCr&&hZXfIe2 zNn?&2b$)sF@jl#6szcABrl?}ZdtNee6{Z%+gW1Myks?w&lP}mek=!vdCr4*SzmZ#C zH23)W&Ag9^S&IJrHP`DEsFMWU3E0F)pafoKiNfvY7_zxzxvI9iKXm8$@c2BEFT_#B zDGAD(?_Iv3grTkCu@VlPHQE+4SbZBu$AzZQu^|P-^>)L<4vnub)Lt)^@R0xb%+rKs`eV?t%-Sc9Ah_rF~L6h@= zd{$ZX6c7h!KO|{0#SvY_UWTd<5^j71qQ=O9y!Hm!-9T2j!>$c8e5!~sopWC86{^Ku4^78yP+#V zE!)*X$Ry`NeHF_+_Uk9)MC$E%F1?rtVon3C0A)^ErI-sr+F-rgIqr6P1A*t``--AN z=Nx9LNNFj{4hL5~rW@kZtH-Tv-Ac8Ux+X9tZvnTy|LT9Avi_G!+i%}17CH`Xfp1qE zND~;?NVSwMjU=T})3TP5p&J&5*FW`j9I!sTyI7eXI$o4Q7J*eDsw((?&Xfs%FHe>} zT8bk*PK%_xOutajv9V>0YT(3TJwjZM*}y|iy?;<_w&BU4=Qo4n-W!0uKIi~YkawnL zvQDBGeyU~Ps&SJ+phDOPdYoCQJ>G51?T`o#b2eI*d-LS04ILvE4Q?wAdAF-aH_{tx zY3p1uymYfj9fzpY!K0TiJA0JvC@v~Kyv;H-I{Z<_nR28W$-= zq=JXan$$0H42gSeBlYW`cX<%e^w6)gf*Z^}xTM|WcJrXtG0DL&j<%)Ex{n4fVd*{3 zVI!6_ca-?Sm)bh!R)HeD&TGnbphHM(eiHJqlLPQ9(RijEc6k9H%)~{)C_OemOsa=DY`0CQ`RKMHjpu7-j z3rbluirl(FphWzG$Bgq!X%WEwC zkV2^27!oyw2Qn|Kp=vaN^m1^ta+DXY9~ZVG0X7w>X4nN|&^cG&_6(7-VM3O#n#+z@ z#8Rr$O0-uIR%E7I|HJfw?n8?@Q;0v=SCXID^I>gL%YS?2zyEHjQ+w}L zi2cDO{q~|I4mrW@1+J~wAT6XcHb^+Yqf@UDOZ#%fs}{Xm0x-KuK_Bgu_Xj8aAOeV`bt6#H1O(#LCd&W zSi;T1Jp8z+S6xw}kSedd>e?pNN82{*td07?15k_uSf(Q^$C3%y*a1011H{9@wUwfD z%4%slTa&$a%ij}5W6w)}i#1+;9&P6Dk&rQ+DEr(!)#YjD`GNCwkL`x4g0xe^_la%V zWVrq<5BSHs8s8Uoq2R4)qRGWesX0O6iaFsw^Y=r)#?#nnfm`v(8Nk&r=A5;qCq0uv zDs&uRAOBl2-QWD4axRT~(i+F{12E5&^^5C&jmX`TtPjqU&c7T4RnHLTpAdP#I4FG~ zEYfmp=J`;4G`1{25bZIei*RCdl>y>&r5@UTcGbUOkG}`t^0t9>Ap2r7R|NT~**1OUg|Y?${U{cex2Y4H1nDEo0M`3FDL>c`)aq6bH}I2` zzh4RGVFP62qq_uB1h6jf{AspbIY)pw2!@C`dlSTsV$~9sMgYPEfgb>^ZU&CKV;U@H zH6VLVK>Ko8r2vnz2-WE$=|(?zR+2;fI3(3Fg?j^sR}pb zG3lV(WzJ}SK%u2P-q7kDJ)cZDX@t9Lu9{bhIm+xnW(_JYzwAVk-XQ5V(16B$)?G~7 zif0{BsJd=+53GM@!F-#G0c%UPQ@K)~@bEn4^|}L8J@NH6`m;~HGlb=HT+&}Pj(S@f8UCx}Q62xzqyFy@ zLyjDYYONAuH)FuVgKmW#gt+S^7`pbHScvp^Fe)#EL|uQUE5M2IYD|x1cV5TTH92#n zq*Sn%{H*Kk#*-(`0c2bzs=%b)bEu2?yadc#B)91eZ=YtuSu+wkJbaJ7zT(O9EVNfMeXkRpd0VWoG>^PDZ`11g zFTau;5g9x#mX=5Lv^It1j@XMF$&_2na^YDMZ6dZzU|j)TUIb^<;E-(roSj)XS&NO3 zQr3=^;cS;XNQ`ssoR! zPDX?w=K(~*#|#X(t&lkp)i}|;7Kc;N86EfDjR>vzK-ivlX2MwEUWq-ivj?Ni?IL_Y zRO5&ENv}W9`N?guiG2H zaqP{4OiK4ePr&Ul|1DVo`V6Obc59`0gi_gy3hFbfxD7dP>o_&7B*%T2mQifYoiEv~ zX~<-iU%|?M8wQUB3WIq0pMd}KhlcO}xB$H?a75iJ8T!Mp+vA?%s3F(8?y-Ie5;nJT zHXaKuD|`G_ziIWogKzMS$S52gShjWhmx=luEin;ZAef(Erec$^!LkxooF|@WIx#I zSMkp9e@Q*-EHkdTs}t7|BDS2uIs}1PKzc?fyxQqXTx+0#zD~1z;W1*sq8;TojO#%eZDy@P9l?z`o4$^Y!0J0Q{xq)~}DVg+L|hIy51E!JHEYY6`SPznAT6{n`oNHD z(Atc~HQxQ)`mOy9I@pp!t#(C5T~?Veh90T1S2qxjx};X-ZL~bOe?ZpFx0`$9XCox) z6)y}bc>IDcI5CWxn`s)_u69sAWaMktq4~}&W{x3;my%wr5<7hD3eQ&j@DHAT7I%Fk zOAIqCt5pg+)6G#I%{SQ{TgZFycCg~fLOp`$;_HNRW+>`Dxn;AhZ`_1xGKO3W_i;B^ z(NUpnKPUG$u3zk2H}o!r;{}^!?z_zA&cD=_oS|))ULc?H zNKg08$O(pEO%ZBPDkSZE^J95w6`b-RluGo_n~(f@xTDrLke+G-WoJ0qKFI#k#xtdm ztV^%eb&01$$7go+ok=}pfig{&qF-mo`}F{+zqyjk)=?)%6Dm}4z*-gl@TXwz@2x>c{XxN#sU;{ zu9!_`Zb&l*h3P~VA}F3LAl6WQ#>f3O^F2eh*ozJ{;7{LozFxdZci2Q~sBW|Tsrqjh z^WaEebKSI5Auu@MnjC-?H?$Z1LM#mJWM&p{z^~WxHMcfZuCQ>Yh8s zALHAmsLAJzZ56Mb336LJ$~*^6uS z%ao-t(>TEp={YigsdiSm)pZdP77y=;3eI7gtdPOM=55B za7T5t0EW_(11LK#{+>Lr`>sxp0*>1$x?2nXU`p&L(C4InbF~lqXd7`T07G${8!wf! zy4Y)Klp=o4Ae)iE7Ej}@TOgV7Hzdvx#>>D%=E0V8p+^ZtQUT-Rb_0n{XN#laPrWs0 z-{CBIjOS`pv&a;o#ykvzfII0QYH`=Jp2YX3$99U(&f~f3TUm$D zUE`>#3MBIh%#%7q=C3Lv!lwdupd+52&#Ce+lZzC8II~8HF)3snsQlgPWo~}(rEPcg zl_vSLKURcDID6kUrv$Ukz#_~cP9j$l(*p@dYv83_!!SD(ZrfdS-{W=Tt5@ZyPpm|v z^kZ^3OPuf=(6WXJ^j2b_1XUSSjyipvzUZwRhuLmpiss7VA>$wQ7S=$EZKL*CRV>PP zj+RYxH&%HfP~?Pea%#BJ+Fv&GzsJx29kSs+38ve!yHG{fxGS^gyC1orpfD;U(e;_U zS3~fw=g~YCI&oE^tL|2+b3T9q8vz7Z>@7=GrVM^2o>37RLi#AolveQ6wIuj(1?`U4 zn(c2}rS!r^&dahDb12{rn62fQ4nk3AU7#KF3iwUU_1uNRSMhs3FeDmet{)hc?2^36 zn7#*nGM<1QyxH%}|E&F)%Fj%Uev)_}w+6GliDeStMyP~a8Lz&V4-ur{@`#p?Uz@bw z-~S-|()Fcreq?ghxz%O+;5|KkSDkd;?iL4yX};~?qww6}&SxMsB8D7<)&vOq)yZnY zNm~Q0T=yvhjc}_1ZuXI2(D>j}gni6;~dY7~+eQ|!o-o{BpI->^2MzS)n z2+54My4QA9YUd)ncZpo~tkV?7d=~3ARCXm?Iaj{Gc*lRIy7;;@CnCSWBxU+^>OBKp zP6|ZAA@cdDcAiRLKm`MI$L+!rTq~b;Ug{Qw^8Gz0>=oFnEm1;7 z;iy~2q1Ds$fF=@dE0QfgDslPcvX=>x=6;qW+FNxUD z{s2*ctgX-;CsyG%PgknY+upvQYN{xWsk(oaY4|F?)*&)fC^Kl4BtFXX#tk3ctpydQ zf3iXxU0y%Dy4ghDV_);9s_Z@6HW~{6i!e}JtX2hboCau?1t%RM#meqSuF<>g9WYDV zF!q{$t#tO2X)7}RLGfV?1s!UY$ZFClEdQV(@JiKRAVx1}+j3BaB55 zJZS5St3|NA$&U*Br&o2d4Z(J*vS|0^&c!{m6CSGkBWi_AF4^YEc-IG=Bflh=ztYsO z&;7N_g20B}zp)V`QU#!tnOsXJ@JQ$%BO^F^BQ~@oNP<8=hJ$yRKY)O=`#D5pDWSnL z;+4n?3x;#}6&=(A_GQ1cX-mOCtMPq~g%2K_b1!>|Ew4-D+yu$SCbmAvE222bNcz|e zl5B`5J!>=O$=1e>(Ky>Ye+trsidg>OkwT8uK_bu;8FCR}Yv0Af9sG0gUc`+c{{eUS z0WZM3gxE3nk9$&BI0}}!v7wzhY8QxW-!#U?t9x+LWlHKXYEbPXF3_{=Y~Xev;G5qO zj2sA@pn4;BBW!9|$*=)%7grqi0_OQ^yCxrBA1{WqMYQDh5YzJL`g_(!Yhuwn7W^Ne zYIRVujKlOg>|iKRUa+jD_OmrO=?2p5R|@pV$YZ|eNcR$>pS7KM)n?IrD*FtPUl1QF|w8jMLH~hVFWfHW`wiu->wra_79^nM7KoN3Mh6yj@|Hh-aG9$iYqA$2l#L>Qq)X z^L(s6f-C67@<(6!iY>UVaHnkssX7%<4RimwmqNV1lV9U-r?B;mVKlw?aDmv{Y?hQ=E=VrvFqE zBt1sGpQIoe>XvZlm|zmBUUqNA`5nPleb-LB`m)aJpxNRzb@gASQ`N1#~?qp6zs;j4rekcUo>UTc}k$)uXCSscZyYeXB_*e zn{rr|9TMpS(ce~TfOmTbh%(x#J!1q|Q8p{EFnW*w90IMP~@q8m>} zKU;Ws#Jc22iTiLS#SGl@8rYn*8D6r0vq5Gfi7u`m-PxU!U=|yA+saj?KF7Uequ~qd za9rtC)h+Pqmx&yuRIIb~f}I87C<#BH#$89Iy8*45Ofn5@1RDxf&H-KW2;`vVE<)_? zpo0qzS9Y9@5|GBh&p(PS<%(Jx?@`_X`md)%nngkx!|)dF1`T@a_s}^icYA)#eyX^YiJ+BNQ|S^w{WmH<~oXglTd9hOMLF#9)eLSeIXM&{%usFy*s5IIu*a5 z-NTxe4ME%oC%*U^@Yw#r6>a{}y~F^d@Di{uk=;f|KJ*dU3OB3>tDpGdo>5*=UgEXw zAd|kwPyUYf`Zvk3<~-|pz8R>n?O<2B^jKaYBa|##>^jyiguc^5)08p(UU$>3R&wGQ z_uhsRRXnkLkg|gU;FEz6m5<8U(ZfEMm{~*X9~`yt<{;FZ@BxQyhr;rjA2SCX9VZ@L z`c%<$D128>xfggl4JL~;V8ie9=^!Z~B$ZRe!R-C}_9LTxl+P)1vfd~lruM%xIHpp2 z_tsYo#m>&|yq$@YlT@vcVt%6(&)Y|7tIlo!kvcz*O?FV#!i5a8Qn|!jJO6y`_J@!V zMm9i_F8iEL7&C})9wTm;>J->pFe~s>5N&bitOdV*yVc!3A0sj9w);lfaxsthd~6i0 z`i~MudFU}tGKLSGFdn~D1*Sh-Yf{O0`c!gc>aoMIrH%(fW!~H8qHK)h zi-d5t7YVz{1)|5nvOCv&Ve92|vlARtu-P z%B^!_T3Y6-L%O|F&|#+el| zq6;&pH_u8~_Pb68`iOn7A8Ee%*ihc-Ky%|N$5Z(fMcy~5pg%t0IXKHyJ6w0;UH+yM!gmad%bG-7GoLSn|7v zEp)y|dc6_vnIZJZD-#mk^9nD^6_TfTS}$H4<0>XS!_+#YqrI7mv=T-V)1eHwZcgS+ zGNalQ_SYRBtJPOe4=|Rono}~1Oc;)|;CULp!}s?6ly^;i9>YFsFvtAVllfO~-Td@X zLNVENJ~kAqO?nTdUZ*>8cRgpBx#!ZX;gf!j^ab!XngHy>o0Xw zNVD+;%31qDBlfNA=ETr_bfu-QN6CLYJN@q$!3YurAhvoK)rn)O5U81X*MRD+*bs&D z%;;Xm!@F6tRP8Szxgv{|=rV=c(oVgatoH{pbT2(#mO9DL^Mmj&1je|MSTg9m(=NlK zxqO@$_6`;=w&jGsFfcmnB8f9`9R@Gx3noY=wk=z3(Lo5ce%5RHI6pmQ&LVZEVS4?P z>{-sB7BwGpEYW8$O#bhBdU;(K1|L~c8LV;qtAw49mo0PI$k$?4sI(~j&3#kpgXK1- zq`6QU>&M@L8XVx@1z?>KYjlEjr$;Jmzzkgc6?%?$H}MDw|} zShwr@FH*H-Yeei}r@5s6k7R!-zyN|$%~h-7{p`5njW0a`@+-ktx#p4=NesQ72>+4N~VE*7n(KWzww~ z-;&yZb5#^LHo4@6U><}mcTRXnD%+qB-?KXB3BRz9PPsG!R! zbvnhVX3Mc;m`-hNj?H6hhRb3|*_38c<&;~Vt*S|>2a@}g29qD@cZ3Dq2|LBwLjay= zFE!BjIJKaDgqzgbH1DXs_8B4KFBmD)krVdn2LJfYd&o+^mDTt1?Jm&wBX8tK+FN5) zOMpRius+p{WfrW?xX8(ALa;2@rvt8Qx60nxdfeHi;Gcr~{Tjh2Nb?kWIOugKke}(q z*bDHKAdzeWL@<+M#8=PHGGSz@;LKceE2p0-_#@5sgAI4kJDBvf+)#k~qJ&1^^21r` z%vFXzCq@?*O^~D9VW!jToQ5%q%=bMX(zW$IpB2(fmN{?Fz40C9Yo8{M+&GQ5uipIe z{h>`lI%v23CwSWbh+_7?{O|uQ&g}n~^5egI@|MF2Xjtu#Wad+Vrt%md5rHJOnt81F zLJ?bv>7!YmPfxmDh9$hl4y?olVk1B_t>R0tpXZm zh~+{VYgcC)DboHxtS-s?4(PMq@acZPZXKX(-{0eSaF70XGys2XXLAz$lg1x2JO}Y& zdNZTw3lEgCyJBnVrMr)p;Sf-rmAR?PzRb-_If1I>OS|QW7nkq2+M`R%7D+;&ca9OD z%|$(C%ir3W`DQ+lPP{rXx&QC^?(`e?{JeKzJG=pDUhF@T=CQ^(!c0Cs>Lc)shMYnp zIl6TxSAS?NcPA_|G0p82Gj{oWT@*Fr>eB~4wV(Hdk$Jv^BF71CGI(`v4~%AO#uC29 zTyBSo;-N^qKw3NFar*{>cr7Er&+GpC2Uw(6`PXwr-%p=VoP+QpbeVh%Bt3{K2!8A~ zrx+fAQ3b~U9K$F_%7ZW?ol1n&#@w@zL{*#W@2Z-ctdmQsh26m3)gDLAXfL%>svFf4l_M8I^th@O}qH3i+e66KEynnh0sjtx+I=5b$z}*^0DG zigyedC!rtfwd}3AjsyKEB_Ak32o}{(VuG54$+xBUp;|kA#$~PjP za7-lxTW7imyk0|ez07v7&mE}FLGUvZe|9S9WSUBLmwVK~eKU`EY&9MUDRNX4xTK?I zh0oX9x=Kr9oM4u*U<=S{{VqZr*SLb7SVuQAPrw20iA~;Q%klK0ZUI7umbdS+%y!Q7 zS(le1tEeTAd8@5VVkck#)0i~8fY8_RU1}?g%HmETX*O7L^bLNH!uiw^s%h$;H}-n% zk-zjfIHT-cyXDKsTLh)FNX0=sC1$L)$ACC!e8oUoM}Az;n5pb_<3}} z2P58i`XE6LMh++bjB*7xhW4PJ@8yf}??Zx0|C74@d)K?(ab9OTG=1Y$dhXMRE$~wQ zofl(a$re-Tydgo?ZxM;diuybK`aXcds|Lqiiv4|Rbe`d%SeIc84xmt$G^wyLqqO+oEh4zB6L(9qF>+62q=HQwPtQ-j6I*MzG3a=22iRMul7RQh zM~rHY^#K~C_hZJug8E_j{;OSXai8vPpT!t;q)Y$Su=2j(x^pRY$<7kTMsh0g+tIr? zSy9vn6vNFL#5N;)DX|;#VrAXgB-_*zGZ}h!2mU$F$!lYGeHwe9=GM{re2k# zaEaGXTp;**{Ja)@Iv-2B-*KGX$lrl!1+&xT9@Pbe-#U0rjxQ!`t98->%l?7tNdK3`IUa zp6?!oY{q?|n91-xynos6(-)X70VRV`wPfAIbm8|8vKNPen>*thqbo(wi9Mxs6;%+)S29X;q;vFDeQ z=&XZ@@YLIi9z5}n(^U$2WX~}+&cu@@W*LJOOzQ0Oq)3GWOs`p7R5n`@i?V8h=XHA_ zbpiV_m%k6^z7~YmKM800u?seWsz_^AM-Vit;BpV1CwFHhuQKe6bIVf!~j*@ zC|fgg!_0V@%ZEREyl|1>d-Nlh7v|AFDMG^Pe2;l4qvi!^e{f0L0n zr^_wD-s!PFX?*_7Td=I2gl-4bza|kDGPs0sYj_R!1S@4^JagUd0^Ot>uRO30+XYkx z7b-jl&s+Bf_;B(Ctp>{nOK;!Uxzi^t$#76L5a!-w$_mExZD}t>sZhHL7t)Xjk^H*w z@89MJcD~RUDySV2FOwWgwA^NXyYZBgTRgjl0`R07bxUnzs^9sB23mij7FxJ?S~*uF zaptS5iA=_WEL-h^v}BdjlTWLWFJ2SN)o1&O8;tXG=n{aUY_4IJAmH(=eJtJ^5WpiU}O$SHHaXVWMLN{{F6s&dIqK2Dv! zdSa9D=!^OYmRv?Gcmk8c-f7Av@DUFW1bZ47}JzlT(v*J6t1ZrrYnh z!neo%fRPhOb%+jB^y3aF?IMt^xP2vVw!g(ZmbkDq=R!9LKjmb6SBSHG^js;~-&bTu ztjhR0a}jLSwoF&L;*5FM6Cgka7Ej@<56n!OXwB66i}Q9YRI9gNvBwH^p+v?b|C1sA z?R@1XHbyRIU#~yPA~5h5*6y-HJogI;ijn3HleV!BgE-0%WHTe!u7yqu4gTz%D@B(A ztzNxM?etAm_Dw8+A8gLXqp1RvrjS37{bzvGJRNI5MaI&X*^l;e8#~y)P6bx~6FBC< zSO+dHR|vz4J~6`8X1k8vW9!W%Y?~^t0se|M!p%+AQ_r?u?BLB%g6!Jw03L-c4>~Q| zbM*HJ3pR@{}2#@%bueB0dAb7D%y{b{;!W&ZqS7`eG2w5XjFL#h~C>YqKsLWMUy zpeMl*q8{-+yI%{(Yb)LV%twmzb4{jM*$oSD>W4;jpG*6f9WpN{tJJvE0$HIX>PlZR zciwS@7o5f=zTdrCG~z1lJN*Zu{dK-=hLiU&7fgP-xD<^VSnF7%<&gE|D{rCeZMR7y9Fxu{u@9Cqd~n5yl- zmT?k?{aFRr85T)(H^6_n0is6&bW!dPBo~{+hHOP;F{8j!m||Nru|Mk)2(+gl4Ws}R zX!I)rIfJk|*-2uaLx5;VupTfLNt;VRmuDcKI!0IpulI7k%?HL7!_9WJT|kkSJLR=I@CRsJ(Pc^%!~To?{G_I1 zEvjvYI1L-z5Ey68ofWXmW?)&r2DlZ3%?;)s$VU+n&TcGwZ$4MR z15o>z;Lh>#o+J^ht1z@H!wu1>pIun;mU(w;(XMHKuJTEh6^D%@ht-8Vx0PGpY;3Ii zRrD{zhnwvGZvym>A%%_V|ACkQ#1oTeUE~;DIul4?bkg1}Ot#FPt}#ZC0L6N8e_ikQmzhXwe~vj=qwm1>)6UfMclrPQAl z^|nh%-#_*~^gl-)Npm1?L>L(YS^wSkbd^Z1_|=Zr^FSqs{~9pHtPPAkSl2v4)r*UB z|4r0u-JHH0V(p^hm)2WhT#xZtkHz<=lE(6mmPIgpe$=myEoFNA)&EcdTV6A;tv97( zRxFuFN?crX2)VpIxV>cJ%MYiQyS{rHu`F*7#Q!ii43it-`>hGEY_v;+ab7Zj@1PhD z8<BjN1(ke(_RM?`%E0?B3+)uZb*uH1V#9L(Y=F!-mr(DalYnepL zr4eB3S@Bt7vzHDBUdII#67%rS)+8d%GI5w`*{UEhs+(Xw?Nzt!$J)h%6OyDXOhEUi zt^!PgjqR*Y{;DB^$VtN}1&_@-w81A887{HifVgGSY^@&s`}=YF0$!p1uGDegb2^ZC zYo!$77gb`(yULcCsvApF^vAQyt;+(zRSX}bH0BW6fUcPoy|#1ss{MG=nKyJp>F)<_ z9B(1+^jVC@#0*%cAb&`Kj(*r5h?^i`0>(H^gSQCZKON9)@cA23rzN~{A;^2kxpwY~ z5HG&ND_^aoBQ(Zb;_TI{KzAccDWz@i1@-M#!&OJgYHzz$zVe*sdz7;S6o8+@pK}Ed zgI8xqB`1}=H;yo|O5R!QRfaYnd74o@)@11&G~xdK{pOv-sje))ZbT-ZbW#XL7c)eH zV#x(q$r|W5SSqhPJUFEB%?WMJtj;yboSAf>SYI$Oji?{K9e?C0R#=(ms=pRWNu3K%okZ>e6e@1NqAy zp@kvSMf>hAG+ee(y=;H~-A)KkFvJ1=-y4poe+u0V%GCddT?+nt{qP%){>k`3m+uuQ zrGim80%saKac!TIp{zsK9JnvnW4WhFwm1OYH$s>@j0YX|$}0ufd6FaUyFeK1&%blc z4W>F*Ae80Y!4lwp1a6ifHRXTlU`v~hIXQUceb7P>Z+F%8qDY01*XmQIZ}=!*VklNv zavqj;6}AtJ31p$#awj}bqJ?L?X>dwO1-L>;!7o84+?J|Q;_SO@P8_Zu%RGc#Dhpuk z!?bWAOhbx<3&rCep|;6d_f2&b@>(&=ob;?K!+qk3VPmOFGvyA`qOYpsO!L9 zPi^x{>b+QZ(l{KfHpNY6i{pNvA4{+Rkl>md4h7le-3}nuwf9YPQCQkV{GdgScTqaG`)zgE{hW|bL+nnE zBa<6i7~3O9*EGd_%Ri!DQu@c!829N#OZm_xyYgS&06VI=M6L|~c>ThUHu=&x-9uV} zS&zF8MmmsQB%SML;#fjlkkQC;#h8ZV{&vIbr&lk~B>GTWJ&N}x# z<@qf&R?M`B9z$5%x9h$0?fnVcUGm~FS2q9V1k6JYOV+^TA)|mEbd}aOl~Yvy#XI1c zf1m9JymI8wZRwh+vZjHyqjzOOyI0pj+hRF;^P&T4}rxc=L`}Z;8hJ9#R~smG1-ZH|I0_qx4oB^p6~EtdolTFRg4%WtQftMSyyCIFcW%B z^*}@Q*|}ObYmYsSA2X!RN(sy6NV`cJIaJ!f(*wn3u%IP!H}??&BY& z_W4*!lyy?SKHGZ-CH^W#Fbj;;HFYobR*c)_@f~SIaYPzj{6UkEJ_SWUda+*wZx#N-wvS}?1u=qWb zXA(y#w%&Tk=Hpi-L(jKMmtN`#EQPSYyy}h+=^w{dW0#6)a*S$PdQmvn*j>Gp=rOkK zch8v|q;-k)qpmEJk2k$fFn!b7V&PnB^Dc=sWAUeZ>;-dWG5v`5`b+&R9cCvs2D)3f zWHNWJvy*WeQua%Jz&ny=NxI27>mZHD6P*JQ7xHm|_f}y5a!bwzYaYBSH+q90g}$0c zn$J-hu|Kh{&f&9f9{5g3JX)>UKmH@ydSxc)$TURy79w~6o_f5dS}<305mu48M5TyK zq@u=I_5+79KT3(dIp!!gnQL$=O|$dbVjs)YPc70-$J=H2#aIv>!3bf(>2>6QKaiz4 zy2^!+Wq!{e}=)@^7xrN-b5;h(~t5?@|KkpvJs1029-(J`DH=O9Ey>EY+9DLm7keyRE z)9iL}tW+=5?n-m>uwv8Tvu51#+%rO&%+F82rg>a0IR%lgduc|hK^`=1-hrl#IOMW# zhY(#?ioeog;Y!L!wH^!p{WQ~kV)M3u)0GwxG(MYK-h z>2qCqo~gGPof?2C`f{MP+^?oC$8l_}1JMIzOSD}chPlFI*TbQNe?Bv{8*`aYq$H-f!6!bRt z0ZG@-v#KnEGR>sZPwVrX{)aBb^)|1G1OIUO_^Zss!v<}I7I0|#4)WET1eONKmgkS+ zhzo4+m+&#*!ZFKj!7?24|3C&^5H%w!`^?$y4;69{)#0&oO@4kpZsY{dl|JI(Tkq@r z{SB{Pb-B&2_?oX!eG|ao!aca%pl1l9Rvvi4ziHUC$* zlHx6&#^kWo>898d!C!X2DdS27miU;Xpne52k|=(#>T-&pXJ0p0fq_W$Z^D+*Kg6*BQ?Sy#|mTg z3bPrtV`GRrHuqCB-@8^>J&pgZ#gEF>`&-yaV|;^;J24FCD(wjZ3n5U+1OVL~w609>L^S&u9w*Kb3PQnOrqn6wBq@*n^_x08-b2J}|7c&JD26om- zPqIT9DJI}8*rgno=V8Z;V_90~TF2jT{wP!?l^r8jl%47)qXwCccZef_oAw4)$PI4s zuo?MAdU`*@fP}r9E8bP)e0JuwUnY-5VXW8q(S-2I)S~S_At^Asvd`pKznh|{ukZZE zyjw6R%*>w*t#Yi79F*^Q{xcvUNFU+~q(vYl>&!Ti^wJ24vkczB0AU@VGd@e=1ATsy zx+1C5bbG*de+F&{5f25u;1*ICSA%BR)*mgkqD7#K*l>Hk>MlCQZ|DaJG(LNRXxxYF z{WN}NUn(Sjp?{*)XED@RLt()01U|zgA`cJa6vMpxCvXgt#sPnkc!T!A;K%LvdJEiT zjZ3q0DuUmZcUqN}wVl7M;G~^Z=RFfo^J!+CT)AouJHp%>LyPu3=T8mVw=Qb><2QW^#Ha3W-IbdT69|res(j-jQ zRG}Q8Yvq?tED-%Xr|pvpoTvNUZdW!bgi`Wuj|iny;$Aoe>eNf?Z5^Qb&5E?aiM%tI zIC?JmcbR1UOs?o8rwERJBAVJM{dy+M)ivOb>wXE>{fUQ%+pau$P_#2+a=WK#qj^py zB3cyN)3+tXd5+`$Tvlgr38MP};qLjGHcKDD>7RBeO2Ia3o8`*0k8Nq=vI(A~30}s1 zPMwL^G|{;^w(cB^1I(kb5bhjX+K`OQwo+ zUOP{~^g0ibhczb^D!wEZt&|f_Y*mlcS=i=Fq6Nv=IH&+dnb;LV8|#pTMc+!^R#u2G z4djQdcN3cpS?YG61ml5Se$16vkxIgnzrmOXH`?WGuY0eO({oY+j-NG#Rs}j?rxZ?`t^|6<@m~Cg(R}P!SyqI z-h9vfPdl||Ep#yDdFCynu;d)@%XZq#!_=jRnQU=uXDEa#WYr31+UE#0uhvlQwepOe z>@P_u8Y`%Zdpjqjm?#hmpi$6WTuHP%?YqftrsYhTdMnZV{iJY4ikn1wlgofhgbid1 z=-LEZjvY*l~cJ)=W2UI&gld2FFipBUb_3w!YW zp6ko*K%-RT#M`_NeHZ_);@tn-Z1SH)&i~&Co8x}kaRnPOn4@TWP6q7ecsCLijS%IF zUVj$&JMp?=t4&S21^W$54hb@r@=*yjxBcD%5KQB_Y@F*9;A zF6Gy?;WnoBU~QXOcIoXiQ+X3x)j*snBUXI@e&WB7J=KqITpiTyeeio-?yVQ4=K7FP z;@gVyV;L$D((7ev+#p@p1ac;mdIc;?;hfiK6?%{4m=W~Ne2z5!d0T3gW&~%}v1_00 zw|6?5Ij4WG4AZ24FYGuohrBARxV6UspQ<}NDm;Afi@C$ujU>(V$jg!Q;RZGZo<#e` zTMET_&K(bssy)`cF^adAdXtfxd7N^zDJ{;9(^O3uFQFubp|8wh9~1c5(83$@IXh`N zFUD$zZuGG}6yo8>k10vgAt~F&r!Adj*9qfQ*P1|Y&m*`J+9DqM$sWXl_2%jg zm3v36)$%|2yiiNsMGjmm6ASXRJu7);a@T<`7UH-|p+^z@+U&hS3@HwnvEGQVGFA{~ z;+h)zG+1{!gw3wJ__kpCsrSv*3DN~53VCPG`BMju$cPt?T7)*;lla1IY0!8GEL1ER zbu^aqH-wOOiIK{pFZXX>V zwcV?0tnl#H{vDsyeY_Wy?&*{k?N7R`zT(6%iv_y>CfaeZaT zSwt*%H*m7Yf>OI+u-XONzNUvpu$j1$gdL{Cl!``;v2CASPvux8cSyk=MCjZ1Fy`;i z!HP<>#e%W6gxqsOxU%0`WmbF5_Py(uQB?GOoIX2jyJ&LYtC67>qhA!^G~U+L?{l!d zDVa~>)WRv;oC}4KPD8d_bBn<5=YGtsa%d;x(0I&0f2@jf=Mo2OdWa?j_jaE_mcYD|_N@dtQ%~)rrr|ImuhzV4wQ_g{#2gt!t0nJZKNj#j-!pcS_%XURDKq}(+m6r>RFK;P znT(#xSv;NJ4@5!F{l`PqcQg-Ed-f^wz${KW*t_3mRGX{F3~39J@;j_^_RaLUPNdxz zqhgV1h#z26iQBc_5aJ|b-EdLRishv!H<4DV9A%?j5GsP`&r?6msC9S{QuL@!IQ7#R zWMiOZl;XALxcc6+GgG$8O*M{>Zyk}p z2C?jM<@?PUSRr)4!VS7wp-lbN4z4P_^nQ{A+p4jOUf!%>w$rKE&9){z$OTf_Hdh1@ zQk9l?@h&8BDz~T2ikkzZH?2#Ve(+J1OSm8}q0dsUboSrotJ9;~aYqz%;7rvuY{lCl zD(pStV^;_C-X?y8krKebYM1vte=z@IY&?he4=)0uc8z?T)SZB# zpHikwMQ*cP{L&bzoc?-x;eJ@Z*Y?!6#=Fdwuxe}^c!w`yTg;8lvao0q`n^ApJ-Q)Y zf*mH}gxbB=CQ9SBX9vC(3ptzFx5@R}*YgnfNuYu+X;@nt`zar2P>j6*nrb|_xyRgH zq~dFszG*Q|Ho?o-!o8X#6pU}X#`Kuo^*zw>R)V&-cXe+O713iENj0(rYP{I0TqYXWr*e!fH)5;}4BYNe3`{VwZrb=duitHH7SI8oKLRike%u`9HxNNt-P>uBb+ z9?qL(X^pzPT{v$laC%~*bWF(9C?Y-ukt0O{=Z(o##=E2?80|c4!m`~UAwDjGj_Qzc zq2Uvs4;|JB#NTRYNcxfRK0z-16VV_{bPn#0rg%Y7E>QR9KD+*Y3W&&qr)JKui7V zZV=21=?pk&N&I2SoBQp|tY%G=9A<=*MIgr`Xcqv=?Xrm~V5Sf*5a@!Rl+&EYHjoC- zd-mQ1h`}2&j(c`}`R0px24SYa$e^2Cji6o$BxvT(W9`eKiL4payJ<6`pS%U>2|)B&G_g<+?Uvlx?#+g6?&jJ3^OV zT`(vZ(iNgAZR|mR>3GK32_ZYI=_6ucv}=DLn?~4iZEmkPcI!Nn(SSFLCM1ZM4fQfo zKA{0dG8K!%jvrtC12NabYGG-X%S;uEdU`K+R~K!J;+52u8^aY&va4?BOq=0&mFcpu zT0bsxKAU2$kADm8#x2y@4V49i>dac<r{j zG9&P+XpM}m>#9h}AB)z6T0&e;4sphABf6ZRot>qgv%@#H5=I@F1wy@CU1lar4Ks-0 z0c6n!^q`sF={p!@ULL;M)qST28TyXpWh$3;x7A?I-v6jGd*WuU8D5=%4o9%}k5Yc;K%GkxVAw%6(z?n+Bb5vChRJ@B zc9IEeJQs7<*LV2mR&4;gKH$ngpocG0;4LU=W~0^D@^$q-XQpw$e&V)iMErTKY0_uI zq|f4?PUnQ(bfJ>sV1h=1^tPGAW?B%%D|)!enBpW;O+7ta0oU1;iG=i+9sbR~Kb%K= zVXSDKfRe{yI~n30z2hu>V%O{#bqvQGY9rB5on`NWcYJr6r8S;niO=OyDk}g&a8rfj#7}?YPVjZ3wC$vqX#DjmpBH{N{U?QE(+HA$fAjN7E8-<|cj-9tx18*in%C2}G z|Nf)W_6IOGoBNvNrdQVB$PQw5%)$XcrCkTWJDU9HMmI>)6Txw9Lq*wlQWq$#V^=em zf>-f@9-4bJbK5>mu2AsPoCx#<`g5>Xhi8$3$G8IKGmdNU7=WeImiXCLp8_Ci18bkG zyMta?oYU~R-X{BILnZZlO4FlQPC6$a>mm}4t?v3*rp@r9M^qCL5!KclHE!qRSs$Y;^VzUa`valsvlP*4oOerr0&lDzD2sa(FbNm?Wl)>7KYXim>+{?> zt1D3%>|Kic^TT-d*(V(-P(7jN?WUa?Sc9jcpS)kWaMn1L4J#ArfNrm&qKd&=bQkL( z*#D^R#_r5^Vqn5dueBaMxbc|jD{Hy4wQZkT#Ee@X`}6f&zoqiRVH~@S;HpjRn(3m7 z&~c1X&a367141y-CMn66eud17>d$PJ{cF;C8lIi!%Q}#F_Hv_Z%dDkbH9v84!;i!KP{1$vFF>SuSL%;^6sM}m@kNm zq)38Wu1Ijcb#5n^PMxo3B^=j{;T^JB`1(6m=ybUbR51VfC5qwH)xADnMp$+)L`>5L32?yLnaG?OTXLm|oZ`H| zi`5JPCKoE05xE67K$EWbJy8D1EIv8!y8X#36WUyH9%vipZ%{nP#ElgW{FPVC6MO` zn-;=doJ8(!kXF_-W0>=VmECm!U+2M&fLzb}M;+wYTw}+j5I^l* zEC1Y#;b5-|m7w{SIKYU=;vYstXcbxz!;Y)~EEEd(C>1#n6J9LX;|U!cdR zmyTx%wEyt>q}V2wK2Gg&c9?o<>vQoV<{lT+>?<}pUm|CJN3q;7oi`MRjP&XH=Z134 zhRcjh_YM7O_>sI+DHswF7Nh7sWv18_e$m8rGL4F@%E@GTEd&v3Ygn#`;FI^fH8V!L zPSI*=+LMdBSCt#;8}GW^MWo!0=WpGYn~iVGOAWvHD+nfSvKQ0&7kq^p?+Zu@M^s>A z*}ME`jdsK3HS&twJ(GqLdKVxhlT8;>&tCb#14>;#pjmjvNKcMBm!Csp47HQ0pk297 zxck8uRP{t+6pjpP1?x(DgN z<=3et_x)IhaHs~`l)R{Eyo5H*i*&6TsqS3eUmtd^qW+3cPx6y;ONp&IbG>uwE4y7* z^=EP;31pmTaMEd7R|iT0t&0+w*AGnfeE22T1-(^>^Vs*eFXCpPb%=*joZ8@yR=j}F z-NvLi1yDK+PY({EOczl4Cl=fBJLw`%^x%r!Uz@JD1-bbYRG*Vz$CW6fPTo0rchzBk zviQw?oON@P3z|O8>{6yFIG22Dq7jympsVf0d`&SlwsvB6hmi z*o{;WcFM{hSiT3}(j{L%;^r$9v)`64{eXZOIcZ2%_5OJ+j2d&1aSVhi1~ay09|tfX z-{^JWWq28np=6kOZLBA{5J&1aZ@msias}aeUHe!#A?K#n!Wlzndq zTXOv-o+edpD9gE%mwRwQ3TB8m=8cB0HR=ha~I3Nk}?4!<6+gBb#0rH@^?{>8erouKdRQ{y0`Cm^zq}p z0L9qV)kCb@EIgC~v&Qe_PJ;>y%md0)UG6|ltj+{x6==8QhRfbDz{a32_*OnI$(8&@ zv2#PJWG9y%zrT}Z<39D|nT+}VI(gO&kbd1Gkh2k0HN9+;G0dT>!OuGeZYT-PkOcA< zvtOjWmWeiZ-^VA@Vt>>06L)VC#ib2asZA|r?LZ%-OM-^=j+ab*dQfLBzuOqf$P{~v zoGamEIkSn=+X1h+zCfF=)ctwjcuYaYAqdsBS#C>|^8!nLjR4k10W{oM=x*38&?($m z-T3SE2kvks6TP9}FSNb!=P>VsRd)T!l&MzLS!v#%1H{la0NPRjGoyq%a)F^eevILm z+iGxrpgySsCXIqsytY+56gYppqkPI*_s(7oH=d{OR=WGKZCP04GO>~xki=9|pr%s)ByxTOnK!rlcQzYE-xDck%47s|>8T z>uYdhbDz&x(Z%*le5IJr02dBps?;s{?K#UbVk>{uwO(ft-_!!C#PFPw==sb#3#Uw_ zV|S{fiq~-jWIJAl3)+ZPNuBB1G(QSoB-@cUI%c58$%!VPc^~o3ik<#lps6jhB%IBvf|eA2EYx%|MJBBrPye!R2A$OlooR3L{G z61$41(FcGZZN^uoOgZKYwmL9EDjIzl*IDdyZ)u>x=#H*$$3aoL^h{dc_oEbOT6w{;yikgB2ABiKaNOZTC|S6A5ZUYGHs{uv5L`^Gf|~R~ZZY zijl~6wX-GAT3rW99s5KSrHi95_E%sj!Vvs{(kRe4=voM(nvW~K%$1n?11YJ&zLbX% z1vlZuY;~Z~u+NO{!c9Ul!T{hO6jEE0m_8yMNn6G^A}CV|fP6&&HbNwPdCW4!9%k8@ zW7L7(5=H|%K*a#LB|8Gs28eSUU{?HrSk|ykVP`G^?Iz2uMu*L>K$dK>gb* z&a}fMVZRszAp3Nf^j90&!SfHqK!m!3T|~Vg!2}23e{;=$`)mtT#7|Qo0``?R`MA?oYh{V4tR7F@?KIeJ~`xmTQY9+Vk|4FdKv42Cw#|Y=zF$!bZ_CT}hpDy@s zW49iNTYbg-is-=%kIm#hS(k%KV_Ux1S)d@A+d2i{3CRSo!INQJ!Sy8Wxst7ys5z;c zD8H>c{|n#BUkD&PH7+&G9Xcez?gMk|-&{AJE4j?x2Tp#lKM*i?X^2wn$`z0TzZvpp<(6 z+Ye|jShmz6l$~xlyogl|aSVbLo>Sj^{x3hl)+8}c3UaYrurQDbU}}ATXHw*}X+~_T z=jhlVK9l~%2VgT`NLoH2R>4C-!J57W7WG3m>jtDAg;Dwb`P5kMU9e#MtIvQYXApNc zI3MSNawm8|?NQjKT2k>S_Y&j%Z|wP}4y*eA`Xm2pC_^vStwjCbVf&ZE#zx4)$g|A~ zrU5=4G<9dw_{q-f<`~7VR(GC>uzu9#ffuVCjG>tcr13h2h^Vxv2zd4JRIW4Aqqu{j zdj^UzyM4yN^s+DJ(yO4wm<-9%py&}Re9>BjIhvOx3ch&>d#j(Rib*j3xl}XTXzX^m z;sP-;!dEz>$|Zj1YH6Y1kt?Xo&sW?x?n{r6MjqYztBUhaw1+ zfBsz?Y~bdwjKj4GAn^$&{LMRsysfS((WM^O{K_2B(>y%q2bzX^2=4^Ax0^f)Eon~N z-G=n^=i6#YvY6fT*$w+{frRsP6Vejln=x?M)x)<)Ns zR4X=$*X~0ttuXGb`_VfPpNP4Mlu4YfvjpPMryl%305Z@s}pbGryNgeU;O z)D{Tt>d4tQP_Ap$1f@`|TXhb7E>VYTWhb9}8Tc;3VJ~EF>e1n`(RKDZ__;%vqoyn) zObek}yGuI(Z7!SkzRV(5iC?#5lKJRur5V`FM z?{@dX=amvk4R3L6xsMURy5}{PW`h(CVnlGV@cZ3C>yax<^mMp}iqKg0mtIHq-MeZ% z*F3U#exfd+-rUO9ly9!*H|jPMurziY#5Z!1O;p*pIB%-&45D3V%CnaSjSwEUu>;FN5al16mMu*Bg&P4@vxyLGsAK%4}1OWTnOA_=R`UkG?_F z>G%Z{Q(eCUHU6_5T7zg8*~c`yFw|r|^Ss%ND>3AL!}XoVF(<>f2G5>4AL_N3V3lWe zIziPKh;3NU=SdTLx%~AshpuUvA@ZmZywv?6UljY~t-wx0rvwK(b>R;m0@Q3JRQ!^A zKX%T2nK2o=?r#s(iuzq6!8E1^&DK$8Si-+BP|h3t3HB`-u0xv-U*qC3D#TrQ+QNEERit6*6RsW^FJC9%tHnWVsJxZ|1 z5$hOt;ju93^8F1XlkE?uv+&c~>)=)6H@Z=x<6%NiOw+!Nbwhc<&=IPSgIenLPs|$D zZDhk0Tx`>I@^zGR)llip+M}l!44qRcSuSBEI;T^0E7M(FWTDRj1c~a(`--R>^ca~y z3~9xm01`B}SXz$m2Gqhbau?@i}vdS5Mc?NwI1ZhJb!3t80NaK`+dEa*K#`TsILA*(ZpWq+*Dl&Pe-uTJX}w{WMD=c-nk-K~9Qz120bj_*C;jVzzy-n(b>6g@13eIj7Ya>2HzQTl$KRxT-^D7q8q(& zNYO$x{ZaIco|_M59=#)h&_hcwu7jSE-5akLPmHS%WmO*Yoa6VCB`kwp`e}=zEAd?) z3V5ZJ+{WH&H)rt6HJFHAiv3$J;m@PmDwrh@JtP2Jn^d1$g-TYBy-DkfXcWwY->P?v ziv@;2Cjq-lyUwpS7N8Di)$2QEVY)H58<$pZ$Y(ampwFyo$MatQ%I}loqZyU1)5l`7(avF9^#`z_|?oKull> zle;s1U?0G%ZM&$uy0t{_MVIZU5)GCSa6YkkPlL`C7HWL}Y=-;bZ*WB`-q-DG$B#!i zS;{_%hwTnlwr!mD?Q$M-^zHDkTmg=ETO+_fZd)G-0Ufn#kr?Qm044w|$OeS3es7eZ3tfJ5AE&8& zqVrLHHA>YNZweW>ZNh*nU`{yLTj1(OghPAGA5n@HdFe*2hUF_YQMh-FXHGcb1AIC{ z3~Y?2Ws>p@Qc?dD`RI#k6N~y-naXPIRse%AtV8e9WeSX zSn%XZM2?=iqooYapesr14Vxn$5P$CSOeD=lBYyb@+8B~1C7q%7{>&);0-QmeyTXic zUyRhzYV=Fxlff<>^_TN^%jQvW^63^lk{`;Jv>?0tqGe|qrmii_7b)PynRZ}E3RbgK z7JWgbHicB0{qh6+qlkNr-8~nsR?@h&oK`Z&Pu?}0(GZ?AvHb0G_fg1}f{$l&+?Z2% z4(r-X-u1#cA#hzas87#90!Q0$yT?@QP}GmkVa9M`o7GmAZk?}^oEkSA)d3Jw!N^@cT``*mU~1* zfb02uT&OP&UnZ1J)=$OU#|c2}o;$%?53RZIQhX;B^RD@Ia$rwX+L4SvF|_>+ zE03v-63vyx-asVX<6Novu&JA#Q6muCg$bw)OXmurEs?H=wA2-1Tfs3>cKkg*{X#2N z%#eG2M#Guk$HM?T%infdjb^|Was-L=weQ$(%cNzNf#b+>GFXni3)sycx!q^Cj{!&6 zze(%;m(H+uGl1Ju%-9^pUc-&%9Cw_$m1Sr`cO+!egI!u2c1b5KRfMz|Xu#`guJ+37 zUsXA8x;S9-ZqQH8CC*@)GK4OGHg#Ai;G8!a(BrJZO-g7I9SR1d> zy){vVo5AG$;rj#P84FkyI6+f=dJ3J$5cO`x= zd8f1(wPVK)GN(Q~uWETpQ!C5bFdqRat+W9}cd&9y*@iwpklQza4wIuyd0lR z?y!`kZk*g!*9TH8zUy0->>i}-FBkwf@y+_V`s>9_J8E#E?aEUY$LwNVCHdW6D}S#X zvgP1}>_X@FNU8NbpGIh*;BoT~{ za^JKQSLWe+yI|^NQ=F2wC2N8O`4r8xZ>*PM7s0$w20Cmb)S7p)USwq3cmN)#XcnPz zJB@Y)j{4m3@4f2iCq_6g#Ch=2b9^4(#;8f>N=04BKh}C;KmXD8;%xLi(7Q5+t%7#& zUvS;=IO4x2#=qwJ{Xg343^%LC=11NjZU?N2uF9peK0rSLaD*)c#{FapH{E(%lnqs%ip?ZptfGgrrR%tGSw%j1nP zK`FgRB(i~7?c%tD# zxK<QVBsK-jcWSNwU=LP}hI)3cJ6d{oSCcUqnJjT?1-V^(r?}f9sQ0(>6 z+1()^|J-JzH>}V~HeVi_H?JxaU@Zv$Y_@o6>k!k11w19Pu$B15Rmz0;a;x)R`bY<2 zw}p?_TgCS2k6-2njjcFW+hPSNj!vepCe4dblTQV*6)ziP&wRq#R$ja>`p(FOMqq$Z1qB&N z1aIWl@^}M4nuPcVOf5$RN;qJureqI|Dh|D?w00m~7iSc&PuEx!!9dM=Lh5Vi6u1+n~11<<_jyQbV=3!Prx|7b@vO&x8Echj#3*ft3e^ z$Ve6(J!^MdzlOHT5Qk>Aj4>V9%I6~;R3*ru3zR6u0VQ+wx(g}VHdg203~#&rie*lX zu^%6}`G)XFdu?*PEIdEZcl|qaM?V%7iUec%GS}tBIxvkVn31WOQOI_?dTn?AcJItx zV@(ZLrAI#1*04s6u0NXfR-I0Dz7NR%%VaD82`SjNA_O4+cC(8BgU~C>)u9QwT!`sm z_xm7rcu8egC+2v&K2GM93Bvtcia(yd9(qt>5o?eugaUvuxM)C$+7v5uk??LDDmq&m zKSnXBqdB!5Psse%=clF;E}!`3V_Egd zjWHK{R&=8>_=E6do_4I%{N&LALzj}P?ngF#lXR*bN=ozn8L>(dFG7S(1zuRG0ccT# z*B3nL$vu3h#weD}OQ$s5??S+IKe;&$Hp_dJ_)jhH`!h!OW;ou>Fn4)&`c~5J`=$L` zUuw@iLN-=j?0Q~O+Fr-(p44kY#35>sZOB+64RBt$gBx0n4n689v!Y8@Cj?Eg#40%> zgT#wO=?Q*!eoC5(8FncnLeFLX;$d8&Wyb&NgVa)}8ssJ`tH$bqDp`0Nug87$27=G+ z{?0>FqJ_pskz_s_gOLRvQPcG2zd1_4z*&MRT+t5hJ$!p#63|RMvV2AlFWG1otSjl(nL~bJW%|8u=MTkTjgCy`>KgGBBAJGfj{Z9{wIg-!PFc8&6ut z_?9jt9-9?-@PrfR&i0MP-f7*I@XY20@?q z!3n2=W_R=Hpx!G|H_R{RLxLIfKzLT$!PcU!2>bYIFK|5&oi3E6or zyxCqdyoc+Ja#q$kFPm`1IZu_BHA;V97_Qi+fUMYn#!o^fHXjYHs-DXc>epDy&bfUi zP*bPSY|FaFGkH2pEgBq4q0K^2%9JuwBzLN`m=fhjhRYSsCIy(L23CCcT5bA<>e4el zUc6et<~Q*tYa~r_LayK{4MIM*>A@+C6nYeBkA6uSaDeXHq@n^0o(TmKWIP?Wx@GLv zG^P;z?s7Y5)lW&ed=7;fnfYUPD>;#7_76N^aqq$tx!hlOkY3F|U5by+h<@J)^9MdQ?_1dhG@^ z`7A|QQdxIa5R@c8*nM)6A-AC2^l?gNyW1_;9(Ot2^c&Vd$RtPVN$Dk9;>1egt{rNd zyZ_XBUf>UbGesXwH7%~9cd+nm1qa6&!&tOq{drZ``fO+O@T9X!qD;z`-(w{#erXQ{$PL$Jl&m@n#m>d$GK;GTXHK#1T_dNZ`=P>nS|OG-kyV4ecF$ zx}VmUba!DAgRIXK9uG8XO2YgCs}X(}+xkZ@)p%vZ7HZ#;LHm8lx@ebSjWAAejMlV# z^P|h2w^cbQ@4a^TX^?2$QD*M0X}cFyok?$N2bHA2k6Jfyg7Dfs2$1GZG- zM-wKFpTH44&j9E5zHmGV5F==&KqL$$a0~{1NbsZ*!QJ}gtHEx4p4`gLU0g){J}F$e zAq-^RJ6MrM`yS@PnstLE?C%6_1Y+dhXPjB>>dd~qRU~s)E}e)rW<5fZ=z7U`QZ$xk4v&qOK(d}U3Zr5`SQ7c9rPFMSSecpj z`*}<#>@3f`^YgbJSwMVPC*etH-scHgZ9q3Qos1hT-;wpa@q^3WQh^$gUy6#!hy9W@zk(BV2W+PY5;T#vA-ptFCSiLCQ6TzsIga0c z6K3}C=bJRK6UV~{HY(djm@=S^r_x#rjy_^5aWk+6N0ilLp9HV5_6hM2M-F21Ki}eK z6TwSiuVts~0@1LF&eMSt&`3b*N`7IUd@&bSPxkuh5^=dcLN|B)F!j+jc}H&H9^6PC z4aF8xr5PnlwBn_jg@ebvcPi{5Mx*Y|-gtYW*Wr741m^1Vf;+c&sBo=+jb}<^lGm9s zNRocs*JdFmoc`rvAJd}IDzvDZpmdGD;aWaNxj|^((y518A3#w1_ zsolG#MtyiVSY~AWkIs-MY&4(Ta)E{;D@UU(r6M#KmrJ}tGL*u29x3{#E6NcD!j7K; zy@!XNpR$>`n}T=KKX}?58C*E#ma2g7Q=Zrh_AA;8>`ZyqE6@u4w$5;AWWB_?5o27) z=*d0JRdpkk?z6#%Mox0Gs>%K5j|x<@HC!4bWAs2^H{aIL!)w8L#%!X4gNb+92I# z;=x-*0hcgd&^U*U>X9_s_*3G>ShF!aagryC$2>4jH90iR-yBvdF&^joSNm*KUH8d$ zrEZ3sU_8)Z%_CwxbM`~jLQHRN6}*k*Pg+iHG(jEz93!58ygL7tXT)ewk8BgQSjY6* zQ#1jjj300a}EQ*WAt1$k^$r#B?ABz*(m6~>3h4Zo9XLtoH0q>ErCm4Pn zB>JAHogS0?BMQT%d1V!Y8qGwRdQa-gtspMGMXPPiVIrc>?D~IY02OvK-veRiOq8WM z{~28)bng!o;rv}G@T%@FQ&5(GFqJdNNCWDC!{^!E3S+Bm5p%RUEaA-7?#0|2S@Vu3 zvTIPQTpa{ytr{5yF7Loz(13XIw{r^)Bq6Va>UAP2mXc@-Y(5}EKRQEqTU^FD66eph zuQcaRIhlMNzfkGI8c3G(TM_JID&SuexuCvwgfLSgpLvSrG=Ov;TT5@wzwQtJo;5bF zo2HDsWy)>$0|D~ms4*D$H)2u)QYWViz6x$I!S|PjOR{t~4Hq1gB40UuZvKXSAnb^3J7S_n)e#V@bjq_}XVH5-FJumhrwgjsHBYC73zH2Nxr~ipgwL~hjM!bB}@bI>n zS0{L6Mw&84e#BJV$8}S1{06x;z*ucgX37NP$plAl#51%GXpeTmccBHK<0Pyk^U_G8 zOsnB(L**iZAF8m7V==?yN5qi;S(n&` zO8Ute35LXF;f+UQef*T0-C8S;l27vN$g?pJA`I(h7kM2!QZ|cqnP?4ScnBQc5ILB0 zHDOrV!{AB~PbxASUNOrQGng`?T9rb!HqdqwULrWYx5Jo7t$!yjoTPU;gmO3}Xhcx+ zq>@*~M|1R7q?YM13cyVM=D=YHffz}siHsYqXdjUi8vI4vGfxjL^)WqF?Q^*WTX;1( zq>x|eaJ#hVoxo)G3pQW*;f1>CZ@udGKT!W%6SDnD{ZsW9tJ46Xo4eGN9s8XiqcR)M zj2Xd_%8{^N*cd&y+&hdg(|8CYb#T%ImjDQ!sP_}OnjK0>ax==iwBQGEdvGqv+C%Ii zr1&2RI>3(N$M6E=8{-Y+PZniQK;cE>!ER@okmW>=F_P6Il;7>Lt8X_P75tM{)YLRS z+}H4s3;%#AKZo4K9>CMi!orc|xK1a2P=9pp8_IT|+hhk}3rwzi`erd4?abLd0`BOp z+K08@@&ZXB_9seDF#wP%A_3x|;0f8_cFOLU6rh{6WA}`EMA}ylqV2o{D~R8G`CMx> z`!o&i9DaAHh1%Lao3tp7IOSQgww96UVpR0{!$U_$(}CWD4^)->5-YR}IbPh%X=~<& z2Dh*dOl|1VJM>;CgjP%-3RkiC^a~oSxy(CXZWK3eudff5M<3!{87+I}Vt5&Z7eINA zMpi8^+ENf;M8VG<++NnN+snVe4;)3Q$Jk?EPwJL@v^Ut7qV|w?yX8a6hwihEd&|J! zJkro>3KrWeQ9-W>3ovkUGaK5~R6>1RtlRjZ;El>p%lF*E1i$#Xg;gH*&ObtM5zc5h zR|=FdiJc4mbdLxMJ)nHcdW<;$1x-nGG&ng>F40Z1{a<@5ymF9k*Q)W83|Bn$qj9Bf zo6c#}dEo0@67T-U6*g%u3*T8&sE2(A_aZ3YIL51VpQxda?iwk5FPjKcGF8No&zG20F^aUTW z#`P@!hP}06?FV0f(2ADgmI;4e5PmVCU$X*n(j6DZ)@M&z&~SvYqIN_Kmaj30u6F5* zVa6~l=5E#2zPBIZG$i!SJ9et=d?Ne4;^FO*nrd7)76ldxFZP?`CO?2pYfK$_UJN}t zHIV;wS*O@Bz{!E93Ut0IMd~?d0u6_jE@7O$Z}TTyD_cJf>fnjQ1AW(#UT^lDRP-R&S{96)yb>Vx z0b1x-307>~Lr9z&MCoP@9Q4(YupNA85p?%jVr?|{zEf8FkG#y_DC1JV?*bmw0Dz55 z$dxigQBrm?EosQ)DxG1Yy`Gn=kEM4u`g?2&KDm-%U=Xuf!IO6KU~J-B$0n;RYxx4q zuk&|tpa0MdY3EVVlRKf|Vs;;#uTg?bjL%W0edyYx7Cor^RhMFAZ+9mlZZ>1*sr0_3 z;ZgSaBhxpU#u-;wC2SGQH@R(i(8<}+P*vAoO0@oZH_3*}>-AjFo3H#&Y`^;oX<9zJ z9h_&Q_x=il7(*0h!|G1XIM3 zvfhpuMX`70O(BzWB@TVch|Dv)xAJ5JUEc7$oFHI+H00(Dv0VX1ZTNE-dGU7%G(zpE}r6?`8L8LMz|rf+{cKK|6o2C%2j`w=De&t>^pGx*{5{mAD; z9-t1&hCxwueq*3!ZU*LviAJLHn1h#(aneH{A9?TTozO@Sc;9!JH}H72N%-y)7#5Cj;SGKa@e||M6`Hm$xeC(;2OozUR`+MK>s3 zQ7QfLYGm6{IAZ}qZ8<`bokZBL377-TEfLMHqq%^Qr7H=KBggr}&fK|iz%>apW_-=| z3V%MVP0EDRF7fZBbe=p z-aecYskBQdcx=jL%^|+T?M_Wbs?%GY8yD^P7OH>?o?@9S!L#D>-oi_6*WwS`a-6rD z0gmA=j3g+XgpJ-Kv6!@_VP4Mkgy_L&wJG;>M$_Zn0!Yt8c0ZC0%z2Hj!4-FzDZDHg zQkZ6(O7vX>&T&ubuOezvm||RHJX6#Z`HRTOREV~XmgC5cjv&*7_g93%xOXAFch2AL zmq}^#1D28>kne#eVif403bGzEcW)`970&!*3k~L1ZdtvyK>6DC`30y*ZL^AdI&)2b z`}^9n+e00ZYUh3L+;J-ubG`W=I=&6bKmXtARs0wIjQ>XvukVuFPJC%JyEPUR>-_>> zbNkZE+9X$F$!kgPcDBmEJx72QvN1iExqF$h3yPD^CLneg!rp$cFmmk*zp>(4Zu@C^ zX*%uR8ZOxoOs%-NK`_>brm~z|NIcQkQ=in`SDC3ess!J-t{~tyD9KfbX#;(Mz5#4& z9g${(=kmI3OS>~mXx^5yWbcTLL0^9}>aFB9v+V{C7r+p?CAY)H+24U)7Ec6%l!4@t zCR5noh;oa-K6GAMLkPMxa=C`Sqp-x2zoxgUqN<_B?IC5~c<^Z+jU3V69GCCmnbCLq z)~A1SeDUJ%Ao8*$p}{VU9F#y_hRto}G)*0;4V5@sUt{5`!)Q41NIK}6gSLXoWwpex z?V5=6K1j)SQE!HH7+N2~ zzD1igqN04b=I$AOIdvy?^Fhof*2X23+Sb-S1^kO+%$a7O6r*Mf64>+aS#K#7G3;Fp z*qD^5k-AmNs>znKPOIUwjqJc(K`toI&o6zy?lIzn1S$AL5CU-Hr`iy5=#x|KM$(W3 z!*@j2%WeRLGWm=Tz_ysxrUNsYOsW9IelaT5{nl__`l?c)k zIQj-F(@^>q^DN{?x0wqlv1>%X{n#PKr7X99i z4tr?xWlwV?3qWm1&yE#!!Xu5upbh#gxu%2H=m3=Qbr9Jw^lJk1_!W|atFw&kIEw}57 zdceeK$a|9T#w#mn(l9&ZkoUP#`3U`dl7pg6LbX-i#-&< zAOe186;2TAGoBQRn$9#HE-4yS623aWb68>iQ8=d+2Ttbk#cFJDIlW*@Y&~m^fGZNHkl1p*|z#{*F99b%@;^-R0*c{q)q6QNxz^T$L5K z6EPRoC8(}juE8mkOy=(0EzSoZLBHcgNmzHgex`N-*0pT4M59W4{B_9r{0^5RZ3p^q zexP0};vWtLswD@&RPYun6|)T@v|yzVz8GAyb|GXn|@##3cUw%nKzXMR=$MuT}~1nBhDwB_)moE=%N%xv;O zBAfOb+`k{<>!XnNWA9o1n7fKW9}B}(Rl!-POW&GGiNT|`;9=NuR5IFxk`iU4POG90 zeRbFvI^=lztIi0lUM+W7vBHU`Zq}V!zyzXL@rC>$%Z>Ht1jqH@f%-(2J&o za%to3{6$?~S1d}}U(eq`ipvl7)Wj-uNJt*){_e(OaQQJ3!4wMrLbrtuf+4LP2@CBv z0~AyUezCDXz|kO7X?${SwW;LezBNZBpR|rkBdV6q3tTN^1#k0CIoUEyKuZ_kg}19h zx#Z(6T||+Xu%!TN(=p1~r#gruS6((+c^T|7OmH7CA5coYA#`CMn~uxFlZtXVjrL)N z;O~PvG{W#aB{!Pr8nJnUX z6H)k-i3cv&ku6L$>$zUX{WW6JI#K__YGsOz>NBJ^#(~!xtGS8o(iko7fLF|nbYZ12 z{lFuAOlY%p?a7k2qeF~E)zC+b=au$r8N?e$8X`JQ8cTCzkoIhCQS7MNbP6azy|xZv z`ium>Y8Y;Ri}~V!WZ>ch)HJ@&2L|p5eRsjx%<}_h)O2dw!Egz8#U<9*8Y&_SB$1$c zy$)S3!hEVHv`$Eq(??=l*dP0uNT1TNxyyEesp#qzr*T7Do$ZQ}4+ScGBwM{xF0z`i zw-94iWYc0=KI085YtRZ5gL$;Hd9S>ePBx~`-#8%K6dNn;1SY=->^!aXH=6WKPwH$st!epxj*V()}H*VN!uyWZ_Ac|Cro z0-tfQ>-3~_c*_=!@g0)hvI%x$5FppBP(_xxP6Nsp%7S6*Pb(Gw(S%x?36~I$0$Jd)t^XLX0&SErddy+4uZBm*x60J0> zey!n%uS;EXW45*NN`>W<CS$s7q4RH*e6pStZyT#IYbUPZwe|w>1ze zZpcZ$1$otfj@zHNBlE4tu@=D}z1}xddb6a$KYy0NPx^A+8fx&s4!)P;+B%5&f|q4t zf#P~Upx)tKvBLAs$D3)~{u=Gi@7qP(#n(m8_kD_=$f6qd#*rQx`ZdPuJ#UClK8>K+ zP$d|ePzt$^2N^64P)qNO4!c{)65&Vw^8Q>iL3CHB1OT_=6=|veT(>V)8g0bx*4$o>B&Bmf1kWP*Sk+{Nh`}F z?rGiuOrg9Ob*ws!CR%-%y^ksT9x?<((nebpS=O;**1l>8Rh0a7|2fxNJ`|WjAAVLF zrXHxW5FzB~i0j>a4?ns9FVkyB#pTi*5K?G8x~ZHicPQM&tj*&^HCgJ%zOW!LP_|Yr&EH~PR+x%O7x!NUMp&#(6QROz?20W zI>M3tJ+}fEK1jZmTN9b zy?FWt2$2;;{d18afc8H6 z3kMqfq_OB^6ttf@v-SdjjO(CCij+^2Va8D5c#0FrN#D|9_aZ?N=k9cUSb>I$ z!L6N!C0ZB?>!re9-ORk{dHf3B9F_!Li*wC(bR3*=G_#gw`PlZiNO$3d%<$&*?04GE zf<&r!`pLedG9xKj_$b*cu63KU|2a`cY+CX4FOb+ig#Q2yKb~UjhYM&f(Elj51fn3# zWyo$ZED}$74Bg9GV$Hzd3-Ssrbq9oj4Y9B%O(Z4gguJWXOyR4@XM< ziq$3Z2fzAYjW}0Ud$r*G2Q9+7y^WOQ_0%xe)@_xA=LXXrTRgsS*^NN^B-nAl&wJtE z9hIAR<1S4A%uMnMhwwXKUu{O}te1C!Z^O3ks#KRg0d7YprhJM?b@Gt2N4Znq zOsQedY;EfE@!==yn?hlBz^~p`7S@`h%u4$ct_7H84fkO}^Ug+e)~n(Gk&NJjx5Cpx z#O%_}D}3abO;%%!kceP8V~`aB`d8d|#Q^W{Pl)$7lZON|)(~ z*%^seA}4ziUj!YRm&2ABnbRT5`cDGn% zcUs$3&iw+d`1|b)XAvN$Q8RU1cRFnxj5DT+RdQVFO_PbX%}G{ri^hq$4$MKw5rEtk zG9qabw*A+J(WsZulA7f8A+$k}?491LkFJ^%o<+3njOUd*Yo}sZC@CF%7;zKpf*Q^2 z!tchrz#fe0F-5t~Gz>FcNuR9PBGQs;R&hoXu7k>+;n`Fz?b<*i#8X{S%ctM# zgoic<@F?a?=;H=8b^%k8bL8B_dLAkl&chfh5$ybV*>+Ha*(2JD3a$BU9UgIn8kuq4 z%rm7E&e1Fi^-|!`Mu^~qGpit-{CTM4ZURwvWRdQiXOVeLUi!{6bPJ_g%}tPH_tUzs z&lZLbV!Z`iG1Qm@8QX>ncUB$1h(T&oHt%)7F!+ep{N@^!IQsQGt^GFfW=)OLcf+!t zS{&ETd5jX+T~{Xhz)|VN=pVpNft%++^CYoj>{r}7%yh!~BinRm$`O*uR{x&d31%d8 z0w}J~z1X~kTjES#2vG>@a;=ZoD}|fp_5-g5l1}Xy#{kW46^p*J%TVFt5)Jeq~Z z+o~n@+{%0%Qm&Eebyvy$La%_LPn(^TE3p5(cIlHNNuuq!KP4w+%X~h|Et{gwWnDXP zP@{1~&iZ-($JdJ7^55^LE4d_|XP}Ud?34bh32ks0yqCr}l4941-HDc@C&jcbZ(}Nt zDAPV}rZ8?E>#D$FJ+as$-9YJ=-#`o2ae;<2WP;vEr^`%JfNdKCQZT>4^`nbC0{n%E`-yApK2}DU>0ODG2 zweT;QcXhmAURqRBF=W4k%W3@doD3nn;dVP!x?M=Es*V+oz1h<&4rP;)av$UmGUX|P z76DF3^Y=N9R*rFB_VSl2J3VC2X>apM;7@7@F33~+eyGG{W1bbRQw)!USI4y50!gyU zUX~u1tJE@2p=fs|R|3-1x2<1k4q?7=QH?&*RBe}aq)XKD%`gVJaH~<80$c0>)N$i*bXG5QvHbh$<`7A zgzwhPj7Xm#9S5HYVq8%K`TprU@@ZE(A3X>_EV!p==a%lZd%0+30W~^~z4KF$ z=iR=PF-N`S#bymCk}515eNv&NH}z+4_i=>k797mdQh+>mi9eyb9!Y@3$Vt#Z^i{Gz zZ&5WmM2J^A<{uDZcno0Un`# z+u z<w&ZZ9?>bF`9J|Yf=3mG=1*)3-kZgG2NX(VAuMer(AhwXzhrDmQSCkfju_JrA} zZ@`wmVMF`wl=Sx}VKwr(8bE@I$O2qu(sN|FGS!x%-@Fg)Kx>DK5Ks)$;mY1Ol?y%B zI5^UJe0zRGofSj3blS~G%GmW|cd!AJ7+~1s?9s{qWxZu97#w8#{d#%D`Q84Zvw=69 z&AbIaFMhs#I9N9=tQI<(ZQg$ z`Rf(I{EuY^%Z*FwPCR(#avL?K0jZfb0GeC}w!Ekd8`G>>`Na^fmzaiv%u(tt&4Y`x zLp~=5mx|&|`71e96Vv(n5~DjYEoeCCJn}MmAYGp=Bedoh`7&C-y9+1kZ@ydC(h1^z zD#j$wMBbdxeeaT1C%J=0nb-olm z(RV87@HwHB(_-LOWB5y~1$qRwH30|hk7?J~4Jg3xaY?^Y_An)qmJO|52OBuw)H+>p zWMg_z*Xo+B@=|fMO(ZvSc#8{E^~CMJaLHq{R5iDkIOcxmeFHE1b=5{vCwvdekCyL` zic8??bxug*(WNgHgM$EMpAmPx_DpA05!ySF2kwI|WI-!7L_A$*ye8tSmu3RrhO5ch zut#0`1#`_g*YAc>X+`86*>tz5py*~*cUn_ZZd_&Yafh9ST@a-yYX2o~zlV}s$!g87 z)qQ;4`Q(?ECtZ&gIq{ymD-+uj0G#xS)THe8eu0wNk)Vb)6A^+zA`^{ezLynuZZb$Zb>uxR46jBlE0 zn`wT=4G!ExJSjQ1e7O^!gcn^SLI;u5)K@))!345c>|iq-@|?|};UnXSe41?O;=Z*3 z`a|hw5;tu#K_!idWCO*`h*?bkopf81;@|^5=f^nIS_wB;`P0eW)O_1?rxbva`mFcgvJL*@ z;P>DCz;J+ssTaGFD4nt;`=_)l^18>O4`QcfQNV?HaD$I21ZK(^zd2?k_23Kx$N=DO zzOHRiKx*3+k@oQQlBNM4Bh?ls(!c1#1*nTt_Kb&k5CKgh;#JjDzI(W zSWP};+ANiyHo+al!OUnrFpwFm8)`UpI!-mr^%9pCmhX9YZprYkU-PDZ=aUJ!Dk}|| z;EEU<|J{29Xr+Hu9QeD>VHsEt+flPNc;zIXHr^qpI#l{=!$>N)w7s)h&2v{t=cmZx zg7ZrpOnT0~& zrRorT*z?$r;v?cJ@=2Vb*|DoKu*bKJZoGVT{+!)?LA9TMhUBEeEGHtBvSo%=0Xx(P zlDnKX*mW!kpTJh?<+?T+e9H8(#mrHjX|K^?msm>7rQ@8q`K7L|-ZSMf)`Qor?%%(A z!@`J3Eb(TSb*8_WnjQuc~ z*ml(q?GXic72_+Ai8V8 zujiL0#6-niTEsS#X;{)92s(7AFVV+nANx!Fj@5H-*Q>92~mJfX;!+#blkcVwWy*a7b>eQF+-&x&0C_;{HR5 z#RDF%e~4HAHh%r@ea%CH{TDFxmSyNDihVB94|pEqBk{keC}i_76f+XwK^L;H@+TzR z|2Pr+^T&@h?1sQx?lLGzUyNA%qo?tK<}h$V?nUT<(D)MA51_(;0D}gv$DQC0_xt_; z^V)1ijCRwjV<&COUQPt;(34!1KgjcxOSbfwn71bLYzl+SdXFV#->XLhy@iODU`sE& zP#alRgCg;uM5*Bmz|zEAsEIpMtxZBxYp$cF#tggI74y+=3O(-qczz_c#L*qpm#3>CSnm_b`3s-iU1`;8pgIL zK~xUp+t7i>F=4-9!L{5ap{>*Pzk2L3On`TyXpvffx#z>2lkcKml&BY*3B<}LVTw|POa z4bU|Cs|9;K0~Qff?5)<<;$CI5`MAgrPD|Y~5*}>fz>W{Y#~wFmg4I0djw7HRU};vw z@qA#-H-R7%TCtGIQXLs9FyD;&wx({-Hg@;x;D(6KVef$Y)=`DSaN`2Ig1>r`{{t5x zpDhD6=k}vO%MjpTY}q15;LJy6jKV`7jo$SQ?NM9V`pa)%9t~pi`2$_X-yGyzrjj>e zV}A`5<8dey>j0emnV z+g=E)EM2;1H_MHrod#FR^c{Jmi&)c_a;w7d@SKBePNvcER~FB3BQkbx&_*D^zKyt+ zfZAn3L5xM()BC8L?@srs_R?V;+Qjj937;{&zUp}aps(ZlIP~6-l}^lP>{qFEBO53R zESuXSieADYBMxk<3R)~+>je6g`n1LC(MX;Ll{bP{QB_CArMbHm^y6Ub=uUl}UqAt6 z0VHr4O^`4hG!uPb<1w8sWFXGZ|JHFs{auelW??X9f^g)XZW+fZNggljD(90-72XS* z6STf5vvybnTeyk_Yl8{RHA{S1e5|wwlkO$^ywX#j(FF}zxMO%<`ejarn2 zK-yA1H*xhNtJwSCC`si}2>a(8a!?mNvSrF9z{_)~AunKpn894wUAT-x!7Y{K5*N`` z@+`t48eB+mGPwnH3HvwYL-<=ws0Wy1`=D40*ad08Mf%pM-H`IlC0Dr#} zPPQFG5QOQh=Ok+*$81Rpl5W?F`D%J+PI)QR!j?xOAuhIHFW9uGx#qpqbOM#rm28{T zEMLPV!A6W6s`A!6+WnyZG|!fS<NJSZw?Jd#pHwgYznAC#3*`0h zbbS8d|9+r7esd`Jf;z$(CoMjq#G?!uk;H*JtCt}o!nPGFxX z{}tcu5(@)<^S`IZoq+%T`u~E)$G;qlf1%g(-+InYj6&SlrpyXT*ckD;YL|kY-kG~L zsWEHJRrR;s{D0$-i^lVxDnr?kg8uW0^#yZu+BkIk{o=Ck11uaq;D|aWQUH)vIo{kO&`h%N9Bs2`0TI zmS*j-|BJo%j%sq-^Tt6yq=`zG5)}{-5D}Cj64XNz5fPClL`0f^h)9!=AV`raD4?M9 zrU;QPMMAGC(wmS1(jy5aJd$$W?Ong${O&#H{@!`#&a9ca@0vectR+c!o}K;dZ~K(S zfLTSK?Yj?DxDeBNZ6kKK<}6I!!@imd(SOCbc0*vC$Jq0Wghr0S{*pW=O5?_#F(0eM zqM-{o5c_lu$E^%ES@qE%^ilS&tr~;)DAIwY9|lz>kW7!>rh-Xy4i6E>+_w#?Fhl88 zk$>`+BOaq`fONtcY8WFF?Qw=7P3;+|nx&nj>(|D#wK6pa2~Uj)pB+nEPi5BdoY;S( zS1jj)rKNqd1R=~9{ih{r#DN@|E)vY-!ceve33X(rS&de7jIK1f_KC7g`Eb<~ZWlsF zt>sY5Rn0>Q+!ar|n@>K~J2V#(wnzu3PnaTjs()+JS;mpg5EK1OE>N(tj}Lh2m{Kk( zobs~jIyhe{-V{OL+ikWF(UVarc6B?z9QIAG$bXbC;acL4JJ{sJabR=k0p5lLA9t57 z24{mi!ydzhtL%?>yz#!fkkJybEmHfe8^vf})#xd|mlWQ5h=rSmU>-TkcmiKM57mO= z{wzpq%)4nE0}K{(1jupdzXj6{puYllMj+oB40cc6|GfOy$~1zdxDT-1YZs+}zmFD0 z<3i1v4;tSG<(e-WSv-QQhOn$?JbWV z+?4{&mbZ-YS1T{TZ5Q8C)2`Q!mgB6upPf6&nPAlI{7XbrVxQspV;#Rp;Gx_bH0)1> z>*%fTa?HnyU;hwD3Pb-fAl!dzSE*k>xPNb``0MxoSBH%#xXI)R59~IkbcKhq zFo@e3nQLqYRE)>yZGL)@&gZvDl%LU(WRV`f3~ z5gT?3zv;D!S?IxPuKgv@nD;1M$w_wEllMl4RqufLHF)&{?9ZREOq$S3UvM&UO&f5X|~_Eq>UH`e;UB0}Y3R7V^A@X=eQxwcef%k$1V(Y^_?y+#!IG>H@-*08vQhRg;F19`0G8~ zrVE0DtcNT>W&JRyE5Zl^?H##}HQ~5Gk7g+ev#L8|b8}oWlcIKZa}s@rR{cx7JW-}A zh)N8g@iDno#YW-y9GGBP)4sq1bXTTOQJYCt<)xdb_sg~)zHKBo>iD=wX?_lY(e?nz zNhCi_a_SEDz%Maf6lDlty!p}$ zj%{hI`h01alxx9Nf+K*F$wi~d+cl#&>2hRh7MewL{3$8YBNpbK*~$mH+ByxB85hf;%B5)TlY{pP4aonBq`Cr_?Rd z#+I_K!ttk0u!cHrxwzT5t|RF?S7S`&@;Gv!oB|MBRMBp*@l<0ZNORd^puTs=v$aps z9Rbp%!=k4Q(*B+(E36P}>fRX2TP|=^yPQpETVvH3lp;A56ilZFe7us$EskH(<)C}k ziw+%Xd~Qqt0ym-lPt;?9mEU2fLE$qS>tSEGF%zdY3`zF1BS>~ewXT3YQj(GVKy%AW z`%;hlJ#0hc&<~&WZ?>0o-JCC<=xgcR0}P2MV#uQyJ{aDcn{*d7G=TWr^^00K4sFrI znO`S?%+eZ@`xFKqoo06x%gvOrkIy1Mwd?4jI@31oFUmGwHCy{=A~emA9*fY_)U}a( zQdk!Dw}ZsZzd1M+4rf04zfD{JpX`|Y2{O*`+)c5+Y{utfI;tE))=hcDe(uTA-sAI+ zKfh^y)Z9B)c7Hcvs2I}>QWCi$`hkWPL`48e8V&Yu3!|<{8EFZ55JONuyetSmD)DOv zjwcb|!V^z5Vgut#)U{(mb;g}jCPG<#A~xVTDt{Uu)zxP5k)vW92i6+R3T&3umQ*CC zQ?)7om3ekDw+D?jaD);_hSeH838fnWtG$IG-@(J+5BNF`Ghq6jS0@d<8lu zSe%j#&RkFFr;CSx+xhnKQK`2KWwZSB$;4Sx`{%Rc-)>1Lm6 zURpxbKe)n)pT_`kvG3p@fT3{Cw^731M9X;IMI-g!J?*Q_0icd0`@31cAUQS(3L+J?vA>pXIxku~rOvRWi%O1O2_t0CSWf^2Tv!D) z2zyht_nFp*q8We*lIw8YPWdpiztGM&5ud#DLR$l-W17H$DnVsk+*k&D%4IgP2p6l>V4a9M@E=n&=6$;$&f~nZEU-Gv_y! z5a*ERC?Q#GfST-ZX82Zcw7Q96?9!1~@oD9+b(UXt8{Oc=Tg^q$g}s^jYZDvHKDh`G zF*Ptl{FA;+Q(PC0sUBQcnkTq+v2F%#`%Y@ z5{-`EOdw9QT&>f4BPS}W_o=gBwJ~yFXNUEyg5po-LAVzf=ciE{C|wCU9yBZFQ05`X z!ZANEVcUcp6^WF4Ymy(5+S?wT%p`hv7<#<9D|x9rDDJxK8NCkFTXf4&uoAC-OzqpJahBIZ@!^6&&yXXpR(}Rn|0cfw0uE2 z_iRy-(nh6$9TiFUWIRXTHJhMOZ;^!)bEX&3!l&#M`5Zzn^F%d)-N=K8qY_(=?CTXNz zsn8dy{AA%T8N(K5wa`T}p-Fha%RyCGOErY7QHMRG&VF6fam<@pDk**YvTAkZdYE`bU$ES5QTJ`*D5 zVP>&RZaI+K6@$+^cgJBD;{#Hh($_u%AHGXJua4Q@@$L)Leh$N%c9`A3cz<@%;kv6B z=DQbE8rmMkOF5es-v}!R?7M7Vvs3*_BnfjvX;ETtc(&$}6?fWQWt;3fT_#s$PImJc zM1+Fd&BG8@h;r?T5V+JU=$I^!z~T#WqK!f1m<`iWnb>Au_jo03)7Oo@mz7q=icfs2%;Ds?_$u|Qw0PDxWg~K*=x+UE{FmaaHo$Ssp~HCBB*$#d0+l*=(7Bj` z$cy!W-9`6bD&&WmsblsrZ2b7WMVcVi`S)Lh7sCh!XrN(rXQ zGYXL=6y5dTSu886D47wZ=K>@@GIEZ|ATRT}e*ad!@F3{`cdfa(&iUBt$ZWI$P6Ce4 zg&out11Gc%^B}6_q42Yu&jfxkYo3y+sp#Y47V)1nuHI4*COER4P?>r3L2e3GA)k17 zN>pZto9O_kC+3Ne7C&Bdb07`SC9Ao~-jFeu{O}GyLO3sdsHu5j-6@+D zG8`r|8Ih)wc`s06Q;6iiLP<@cNH^QcwR`h(?S73JpIR1fy5osHN)XoXrD{qa!KMIT zHvZKRqW5F$d)K!9gC1ysMq}bjIv75QHH2UL5R^OOQl}wMy*iZ-Z#*9>p%Fl+(y5UOn?)Pk9js~74ZaKYQT7BcsiDE3ydW0`q+!j!a>Sb z&4Bm+Bn!-)Vlp~2l?Eb9XQxHlu#SU%W)(KJKNA%hO*5)us>^KtBOku`=N9v{^jD+C zm$M57!b=Do`t~?IAoy#}!pSno}e@+j-pe!r{`V3FzIiG)go3GN|5}NSBXV;Sjut;43}>y^#fZO-sz+3!Z%KU zgsfN^8FV8q>?{!9>7m`Fp92aKE*SvOfvvg|ivSfj=aw8fD#2?<%)6BH2j-fiujHI{ ziZx$BtyeQX*bAiYN65 zz67FbcZlMrPsMWlZncP{fvDG63XIGDnz~(}+v-22Zuj3>*ZrTY?)Xpb>ci7iCg7ic zbyYb~j7qp$hz)yu&a$o)6jBJ9>pIzkB%PIfseb9!_J^gYrm@*=`T;D7bC#bJx)VhQ zJ>1sKF*et-0cWRLG7vU-gehn-7`}DyIyC+l93+_Au^?H&XR9)vH9FF6G5bUidz(a; z_q(H1zgC;T4g~rKCny8>;O5|rt#M0h{h4f_m@RZGpBEO(RH7rmkzWdI5IC!06Jb-- z;HYM)BNW}t2h*+{lbKjopIlr0d%OVx=lHNMl_Js@whBp4lj`oNA?h}--!BSz9r==S zy@;Ta0INW+*5dmku@xE->c6u{*$18+GaJO24EwPb82c1W>VL;h6e`ZWn|r%@;CUSH z9GMl!uer9s6VMVM6lc!}Gb&{uJ?yRrIACiPUmHEwSXcebl5%#MxN1FTA|g-G>#g8< z(z$e^;eNIgbtdL6k6s<*OvVw+NTwh!qEglpblAv!_}ZYnSqI{C46e-hu9hurgLH%t+S6ls!~H^p1SL0z}0*J;<|mA2M(4C|7SDj7r<* zX)tKwnF~{W&ew8K5B_mHxIoZAqQO%Ho|OeD8m?;4Y=AuOYX~6FICe3jJ-w>?4HFGCrKx zf}S*?ssK-|UpYu2WC||Eunq131rsvPxK%6Lk!q5D@*n&n8uEJHE~;~jmULnvA!W;u zE|f=Q!-B8-F|%)$Q2TgGaFA|JSlJq_(sDj?IC)t!Kei{OS?S3UBa}0(OmI^OsH!3= z;?sXfOu>4(I^f}OvYiKdf(aA!$MDoT;j{ z-srZ2&(C(I&R&H}f=a`CB9_Vh6THz7@O=40(bZ#N>hMTF)~8FBh%=n5hSrYM)eO7p z-P&-B{gUkJByjOPCN;Xraf6$zxD#0Qk&KCW=S4rQa`Ta=V*-1Ev89cEsTJ;|vSDXGbW~+kl4gLsG>lTEdqN%sfl=*~??dfz-=a30g}>(Scje{m z6Lbqt2)+I`>v+j^#Vn5tM8w`3y*1SOh^l0**0;U+&&FB!(10;g%Nd2nm*T3j!gNe& z8gzTcOTfNElT2cZbu@?{b$7m%r3kg?UVGo8K42oyo|l$#%j%|v0z2Ez1Zdj<3O!h_ zCCF;F6h;K{z7e*%DP@ZyPjO4zPoJ#12<3M&}em|&Bzy>Y3vTJQB%yV#dfHeMGhHuYK@@3Zvs63nDxX&Qlv zOzr^UniPZ}WOeaRm^Ej@hLn#VK^zE7SXzB-=E9>CfA{IB&Y#g@mim4&EFJr6`4h>rgA@&|@l7(o}ZFJ6&#ci=GN_xPg_!!Z%f7glCRcdm#2f7S?XkF1N~~QsGtV zZY`gT@FiU4k-OgOpbZ7DEOva;O6*M>zX?hf5jjf8Hp!2y(u(EM(RyPo&UczmmB1W* z>C$_-;@x?@BY{&T^(UM~<5Oz+dqCB@LUjjg)QoPCYqSG9IRa5J7OIYjb6};MU92Bk zJK9)V8R*P*OXrPjM`KdcCoaJsQQz4$=d8B8_4lSuE{DIMn=`G&eM%cz6MIwx30sQtAx#C7m`yTA=9@gsC! zm!CO?)#pzr!*O3=G?usd``Iic;yk{0tFe5R?QqGv96Pr@u zs%Si@tyr2gkhdUrJVFZ9ul<{@4AA_9i7sU-hF?g1P|X}n_>?50pcGe}ST%P$ZkFv8 z+XAi%)6x$Pm@P(T<0*=`Dft0zkFEkzo#I#f1gToMl13eS^T6|T%|v14%wd!8los|x zC1DO&;#j@>gP{*M=!cJDL@%_NXw8Tg`zEt3^vq$`on#JYUArMBY1 z>rzi;(zot;ccIgLz49C;ytkPP?g#A)qw*)+(gTmo^oo**oXH1dxv|-$+zOq-!^c6W z`%6B!3ky6YKiUd@MYrhw(A8LC?|Mg;9hkf43GbBVizEg7 zl+#<;IoRCr*VL2`unEI{cm=M_!jD|DTnK!fk zEIDDd1^O9(PoRPY{oh`}_+On${97g&|HUJ?4G?BHby`ZkfjU*8+RmH*Q|j1yU|Xw+ zms0FU>F3K&*LbF0UWY51lu8TBPBaMKnPV-O`HKyoct-k`urXd9I#?F`oOEqu{T86aSar zG!HRBTcf30+a-_s1jHoq;luXLhVV$ti0A|Mp8~JP-0@}esK~uTw_Zuf^GD+=4)8&&`gd(Mn$$gHn7L6`2=EBY17*OfO|dcV9$yJK|!cNRbCCu*reG>L0(CWCzt=hCiS%}sNPZ1g$| z@K$T8-13+&xJhx~j8vP48kj_{2GA5Blgkvdgt@Qa43-%=?$*Y-JT3A z2`)zMe|ztPOlWIC)g$T6i+~@%NI`q@Pgc&tN#cOO0TV&Npy1X4x=ymDUqh_DEs^L# zyC&uLZZw+uM51Ork$AG?=pMzS7~+GhsUm68?1WcE6Zseuqjz&_QM-u|lSpJrpv4U^h^!ahA@HN`BrGP#q0N^2h| zC-&2>LLER7>J2Oc=~Q^b7K$;M8sPhpDEqNj@9R5rPMdExWRkSgOdGTo9_`R*$DuxI z0Rq|(nrGWoAFMfZDTswjV(zpR= zOvKN3`b>u#4b`UHuh*(6`8s?Sz1p)&;HB6Fg`3&<{h11xa43bmMlb}~AVCSrDzgp@ zHaXD6Iaw|KxF+iJbHYBgr#z~JYxh0Y!3Z~=h~^KJqYf|ws^M|kjrH~Gr64`!PrIHJCGpeJ7C*AG4Hmq}2=H7DZXr)r} zgz<@^r(Uvta&9fZ(tLZbW{H$M^ayXpHtl|0N^R7$>g85n)zId6*@BV7k7ct4-ec`z zpr-LVcj~9Ccb>}X5V($#XBJhDypV9(g+Q%|Pd@Zg3>1>~N$&2&%Bi2O@M8^X&r&%` z42fZP;8hxQVHE<9dx0hr!a<_4?V#9jNiyo%n1;>NF4T?v2O4}+v2G<) zhxrZu4!B@oNegW6DXa(tYugCKSPyv?eyAKMElim4SjxNcj!Wi4yjXuP&%U~4Jxnh= zI;*ye)In3b5$H{XlibMx=Lb-dfl=4r50u@lGWA>Al9Vz%NB@xDXi%HL_h`bL{zhW= z;${77g?S&6;V#9Pel$qOa1_qfj}#38rWLn4@W?`!EvEeqhNMn2Q5fWk@5tBnyuu98r0D&f1@ArD>1;GV<_PeXc;6@vB%*#P9&cuGPaw_0*3EdA9}2&nOzt}ST=|Th zePoQ1arwPVH?#e39H_hb+NkJe`OcDqh!?;N9II#K{y;lhd&}yoRSrl~cvG89g3h-Z z>!hGHek=rb&Qw#<05;eU2|#m&V26Q;w37HFd#n5#oH}`n4~kQja?u z&G|~d;V>7hm1ZH-&=YB=0>_B`32=P%L()PQjI$_=ZthG@of)JK&TRSdXi=9hQTjbj z{N}`Cu>9eH)dA@+)tJk<^+x`1lpE$1XsRYoo@v8YsH~2G({$3X!~g#BX$QedQva#Oo{Px^EOYBZx`apWs1f`JQq8M9VxW<2ANV*C5E-NWil7^D*DUK~ zWD-Zy6*sb|ssI;pgzgTN5OEU$)HL#K6dRO>FIM(0D^>bo`Xej2$iYW)q-sj!`}BH> zr^M!>&xUJrBBUTk0y4YH>7C&;Rl4HPaaapV4p2zV(4}{hZEenReC+EuI4Kx06Py+X z*BatZTGO+ee?v_IcGn}r8Y03}|Fr=B6+v@_0DnzOOEHGs>#f8{ z1o;|z0#0m&vf8CYKaA#)(T_K2pD(XF!(s-94>wLVy{C@wJul_Y> zXNfQpAQB+eAT@AGPI5YoG6x7S!5i+QllF|~ruWkL-&PcqU&%Xj<76A5y|r553f28>|Ah=b5^L zy2sEh$|yd%@mF;_Z@%%BOj)ulsVT+fZ4K4@a{!N$YJ}yLXkANIz2-$lS<7|bAfdiD zG!=>lH4d^Ohj%sYf!t>^>b$nl(N=>?lQKT7gGyuj+MS(U9QLuZ&ruu}5b;>DIsebI znDK39*I5%l#jdbgP4N)3uO5Gz8*{8(24sf=2gj41 z%03DGaOIvq7pLQm@t$Jyv~TVgHIm^)S-S&WhFVbkgGkAnzPq2fay)kx{dq_)d{gL2 z!0#-w$NHI#`G64<@$e5Mt_}H3{(r32`EReg_iw3X{r$by}jH_+H&C{jSy zSO=m60ZO4{wa>tS+N@(xX%&1@j*!orsvGcwq+^=D@St9w$^FA{p~qjk&nd3>Oj;3F zG*u0ILHqg7q^%GkGHey}%Q^NyHZ(7{hiOsxiaN4uJC;jhqR|E)e=|tCEYUwpF)U87 zXZPBc@3GJ}F>ahZZM`Y^)t~CM$$Evk)p^9yWqfEsV)hc`$8qZRZ3mfwweQ2yGoEM zu%e{spuUR?VH7a6Aq)Z5&L1VBanPka+l+X7V0CuCns)!KWHPlZ>tr`pg%hYO;&x*l zare^(;GmdgFe|H{mpnTkb|z5JneF0@4;LPF z!-^f?5EuCjvF43e8Pb&OA4}w!I$6rsnd8LQ{-*K=VVbLXRh#?sdFqhkOc#-#Q~+e3 zWXTx%zNxO6eT;;%A9P1T>18qGIq!!@X`X54=U2^qPTb_|)si_D$|5J}K@GrjlJ=9~ z)Nry_DgfelXuC!eX?&%h-Z=URuIoL+^vvgbcB-XF1LS|##payZw0fYgYkavibi zTA%7Ky;J_tnj#+4DDVI;r8Zaw_{BLOQ*7{{cu;=~*8mwGnCQ&uXNM+z2n+$n?&e)u zVn$yGoi+hZ^O#{;;%?~TT5D8unyvG@^ux8ng)O-jUUTv+hTmuJHnWKP#92EPL4G@i9e(`-$(p8QSE+RMOI6tfa;K2*g(m}MD{WG5_- z&YzBPbJ3&ph{v6*Ncts+D%ufb7KL?93`HFn_(6vgyxA9R{StDHeD>q0$zu2Uve+`* zEyUvWj!j-?m*$%{rrqx2^iD>tm?2%zm#QwdQUb-oAo=vVen^4s(61X~c3y1vC3;!0 zF~>U)6+1;?jj%G0bnhoyua6I|nGeKPM-0C&>AQ2=zU&-f&Dz^%yMkNp^~ zrWE0Zf=P$vgKCYbB5JM4CU@5P-raIx&yfzZ5}ZDR`wrRBAHRfx$=F{Lg7FdntESAf zQD2OBA{8Ae$$jTXbrehQ4DkfpmEWw?KbvcI7t7W zbVzAH`ft0+7JZP%!a9{NPAR+l$=2llR_dklEmPg;`%7tAjS_jd0%kHX8o`YcMcNJ> z9%xbs3^1h#6*Q=|2^V-s_I7?caq)>DNrq*w>MQ<>*gWJ}GJoV6Y0GTyA`l$^T<~HD zpFBCKnXafeMSnh?=NbHk6wZ_BV{Cyw{B0dR+0}`yOnby+^LWM_Rhw$VOIli791&jD zDv12;qpr2VK82?W%U@1NU%8QDDT9)R_&b=wj0CI)SY*Im`DM}_w?V&3mTR}avN)Oe z=(wMCH6d>)`h|8xL2u<}Z@wBHPk6(z2oW|-X2P1%>Xx|H1CoL059J^Gx|FUK2Tt=_ zI_vBU*#o}+P`GRVBN(?|5d9pvuO(eBYVAj)Y~9f6;o6i))%eBGS&4m`MVhBa9>1hd z-e9&nC@~t)_&m@UYh6ORYDDUYJNpo59}T{YJyF^6@$zLPJIjiOq)whwuavXWwPx8u zd{qIBlIo*a%C9j`6s5KqCW;SHBzCo#u|bLFC}k4y(-3Zm&eWv@$r2^Cy z6L~za@O*&g2!yxQ&lC~u*)CR94Eq(vZ}Ai#zLvg}+kHVXlxLN9D!^4t^X*ln1{8Ao z%{z~0ZvK2hL`5I>w_m)z_5`z>+n3aYuPEQ2v5XRz7#MVVjS$fu zwdZM|^AoCwGU5Tc1q@-mu@i>Gg;n^hSp;?$(kVX>D5wc~X0>HXeSJ_jI%}S}a837$ zOEv6cwBEbhPrIK~x*?h@&9LYe3n0@B8hBOStEb?r8?;l4sour#iU7%n{cs~W6)LiSOjpP8-`izjl91FknSF2gqX3*}#qniP~ zSP4cl$wxX-46+D8r>NxTw!X{BvDU|BV-2d551JkwZ~P~5cEEqAYFyb>`L2n&zP;Ev=c|U7`ECqt1EoRs zL>@*W-69V)f+b%^JVDg(w=8G_U@3p#`Joo!`Z<-RHjCkdM8%i!mHesK9vg@R_R_ye z99+iYqdmBR&DH+=CsxW7la7LDw&dfKemu(#HzSZ-8-Y~Cd-G31W-YdUHAw~I*$Fd) zUu45XpZj0a6GUC>=Y=k`X7ZFjyF^C4^eC|Vte$pK-<|t>qI#Ea-5wUDuGF)p`-@y& z++aF01;DoOA$wEDAPU(DSY($FF^|WXaumOXSe;Wo9;NYe#-k5N#@mWY{p-eo4Jpo- zMV%x_vzxB?uGXvxyEYiBt~UJ|BSR<1mvUsGJ>8j9(@=V2IreQDcs?Ze>kPAs=6k+1 z>Ytv-N@Lq>BWkpIN8m&@Z
yn&*GOnKZ7BR@s!ALd*?a`n&nK6-VldNYwNP_d0E z2s-NmdV@;u)XfwKG>$4RzS^`yZ7Qju$3^c2&OdjzK8}(?eYu3C?2XUZr3xsH4Yo`x zO9Hk!q+L6JzMl;*7p=0lPx<5qfiW^X-g|*PVBcN;E`1?}m~9|uGgXQg#5P~2yon10 z3Y#q;hUf38$-J=e0Ch$Dw8X12)z9aOe?2iJffDqSBkE^(68DO7V5qlK7e_PClLdOE zT?P_%pEz@6$=zF#qrY1PlJAdcRCA@-pt?|}8-|sM16iFK2`ZWTb|JL~gWnl`f*+Tt zc)ai`&WaQtRMtTDCTPUy2si?@xJ>aO?@d1Cn|8{&!MB5heeUdP`+iu|Oo>J0?g>tz zrQhb}MmkS>Ma$=NNOp%u<@ALS~Z^*OL{k{#h?2~Pb&aPmpoK_ zsQF|NviYUSMTlBmqM?}`ucVqWTLU(XLxS<93qgkzPO z!M#PRWz`#IBrWgF*sW` z^Ed~az=;(dh~hvGG}8ejsFgbVJ4-xZtL!7wxq*_>BM8i!BS@f?bg+au615(?++Vky z$zLF^1bAwx1XN<8SJ1Etc#Z&@8F_#s2&TH5}S`+8i%kOP8ya#bB1Qd>_=B6k~rT$7dp7^jfx0BqwAyRz2uTdo!tr^ z*<3W`DY_)#YE!S_fe$K%-*hNU~?|hkm+9o7xw0wxow(iEiaR=u8XW!I_lRx2F zRyO7U@TfIn_9H4$Pbb&FvmoK-z*}QEv$q`0mj`xHp-%&sjP4`BtUEv7ZZr8h8EJmq z%s%2;JWRr`!D72~trt@yVAp0Wq=8{_ags%B5<9fbT0W|^@ARIgVky@u zJ<8S?^D&gU@OxYP*({VsLSDo>Huirh0A~OuaD|E)b|J&)2P_D>)f6zH`sCpPoC0{$ zHNxNJb8-}Xd?jsKdFZFPygwV8q{qtIIE}fd;T^hl9&~lR5}{)naRRf5jurly#q@ho zJpinBe=5wq#_)UcZ?giu4Hax&8=NoD57k4&jmXp#?;JPb_v7P}2Ic3x@5Tz68-#Ng z$Lx9wgdkZIFE;K#^(iRj_?*kC<6!7Lul~Oewf`uk{GUdg>tE~N{-6B*|F%&3|60xe z|NivVRR^kC8UU$=Q2<0YHv3b5{2x776qf86K+j&oM-8=S?VyJ}wsWx@VC+7=_De{I z|Nj%x`9Bh@7FyUwuU~x!#6Ek?7<;U;t^U?Z-jsC_V8jl=KM64T!BsskggBW!*HeI& zb;vF3(_>658o%ZJ^P+bsdNsuzu@f+=_;Xh_|NrRa{L|~@{Lu_o2%lB8Mmj~d0z&0t zME*%N>f_P8uR3ys40M%)y{1x^%rV1*DxYO4RQE*ZDYbLc^@p(ddO*fZ@VrF{XoZbC z5t)^Q_;X0oX(siIm$pZSKP_22VCITu3*zM?(L+h$=GsrA+6NED2EztQh3G4wvA=}9 zi`zMYh{=Kk+=0-}06-lVf_~3p3+fmeZ(8&_i+~N|PT~gUBY(1e;$m(L(+?yySk)*>Pr!;!mI$6&8US1zw-q zbO94;+z1@JB{q$pB0x2y#1}NZM0-0G>7^I7(Pl*-+k1zdN?q?HTGU@qJ7Zvd%xg0D zMj{lVL7MuV#ejzb`YyY?;81!C1>m)|sa=$=NI!lK2x!i%P}D33PkFd|u6dSyuSlOE z&NwT`+&Ad9Od3vcmS*=c90j?XHtp+ulk%USt;WvDD1v)pCoQOAkR*+t*>VXfkWJUD zAI31{kiQK-fPN4`mn~ze`+@yQp2u#5^>sDLr_*oT+VkyZR0sOL5WSB67q}?uE39In zl>x`su5N)(#Q_XAjC;xMb6G}_iMycuV0nOdL#oRyV;7lps@b3KL$})S=h#g6KfJo~ zm<0qAW=iu1Qnq3AYrnHJXP7aM{+$K;!yihd3l~urql2C?n{h&6D#{hXR-XL&Jjo#} zD~JL>4#Yw7Yt5h$fNf(qb-=g|k35hJZVkXrppKGjqrLbdt8)=fx|W~VU+rdD^^>(@ zzt`Ja%RRi1FO!x1%61xt8qgQY{zZe@^~ZnzTXBQze~e}HKO!*y?Al>}I16F@MRW-P zLEEk{v`g0RJ2A)AUwh1A!oAi=uori4*-X?Jy%xLA*&B$NXf=_jJ&FYd9=-`U$?-E% zu&W;f;ozwFi0T{brif4uFFyRh9|24O5!wL}f^%1Y>GjVogER8*>eg+)c!2`^9{xJh+!UCQQeYW`_(w zgfV%bBJvm>@ub?`ti9<_xY2MUJmge$1T+3N>Bz^eX50~V_wswi#)ro=#kz33EVl&b zxL*$W;5HXe$xsi(C|H`Duv`v&*Lxs_0t-b{!CNttt}OxReY*Sz-JAlOJ83)W&vZ=r zcN{X*?7n25V2?=J!jpo8~Z)E@_>6uHh^3h2thqGb$8plj{wA#4|(sP4A-4g z*vGn}!z%4g-jm1knlh)IpTUCK<;%l@CRs2O_4EgfEZ~XZh>b;4^^ssgRb=v(y3W6i zN&6`gh$FzT56tyL-pqnU)B95%6~y>!)cE4xCO7|}9PZcu(f<1<*Z%v%PJ#7is^x`?OJc zGWW*iWMtNZ&k=kbEk)N~Xz!Utx#($L{w zUd2sd{Ou0g->5+}+In-E3;Ck-;BJ~}{r)P4-y%kL!;hu*aJmTr4ZZ8=U)bw25E`#v z)GEn6YyYebCQ`+RzGfGKU99&iEnPP`b60xKAceIHeuDT8HGtdIqzwI$WvTQbR)d+a;V&%hWyuWn3PrmAJksYihu6S~OLYMdnIR0X!eBbp&aJ zvkNKkc z{Y{;W^rqe8fq8h&*i~IN=CH0J6iYw|o#S>(-&jt4_@yAnbk9+Xy}88>hUlm2zHwc( z*|H@#_#H$-R#j2+wB5xKrYK#i4xrtpVuPO9+V9;F93?r+c&@)I+hyp>a}?j=jC;k) zipYYIFQX^Ahz=QmAbpkbu4*kp1kKuX5bD=^Ka@>=y)xC|6I^oP=#n3sv%7SMM>O-& z$;ev^5(?dg-=pUJO+Bc7V>L@mVLR@gYZYL&KD4e2NDpCbs4p{TL90;~))jfh#)AFvIDmVQY&_qL&hfl+x31fGe@X_llp z*c#%f&n)^eiUE==YEc4sWd2l#{_U~+A60Yy51acxH_rm7F!>hH=qtyy;CB3LF*r$0 zC$muW&?)9MU|Thdw5Ny=w%Z`550v?aBDbVZ{+(mn=2MC=z1zP1H%i9byR|8LuqJgY`~w*IUeW3lF;=S(xoCe)6#zUym#`&8q;J?VD2nTFgj?rP;Oi4hy4aiIT_vlQTYReoERs#xTPDQ9-F46{YdcaKf z!V&rFr|ai))SWAn_tgC+yPH!XENH2DA@`RxE}p0&JjqV6B6;8wxq2GJ$lGDcYbPS= z=IJMTi^UDT-9NX;cL<(culREfFrUJ%0@9Bq(&vrrb_Z}+;E!{VyC6Xh3~rrth`#uW zKZ(?R?TjI>boX7uk78JSDC#gJq>73KEq71cq?|*-LcmMD+St@D-{!nvb*I;}#qaN7 zVauFuhUYAAc<=iqZ%B{}Y_uen!Y;YRl*BeH9zJG$o9$!gNo^~_J2csIjV^$(+NaTR z)|+US@I5y5nFA%B9TsQk8~_hIAvQ%=AkBZrKO%JQhud4)bJvn>xMQjI+RH1wp>Iw)(sR z-O&iSQ_!8}>RvJy?z+2&f4lD~Dflg4l6m8mymT@LYTg!r5d0Nu`b?@GKWNp9rAIv2 z^W@;;I1aY=c%V+^0W@DwN1^oIMWi$%uIbE5K(_e+owI8!8|Q82JOPE6=Z)5)RD)Rrs8ic}jitL92#jwojFNDK=T0d`hN zlLoX#fl*P61f(XvI)Ze3gc>8HjoH7xH4Vi!;NH)@ zkpuGFor#bNmL3lHyxuew2Y%2bz#uBN)^+shMh~UR*a(jLf>vK%6ZDUdPJP(RAq(P& z10MlBX$A6!IJg^JklB|moVy-4)>0$JH+P^FY*5Bhgu(Tsh;=s+kUYUl&ZK1!joG^pV+A*(Y;9)dbTPx=4 ztr(_W$qoZN_#@cw3Qerw4_g+XmvGjYIqp#Szl*%Ou_lSM0kHXcc^4>+Xs_7T1K-Je zHl+os6#B?EDp}4EC2EtAuC2bYhQ-XOM{GHzJWM&)e)DjSv{%v zKe%#n%FSYa3T*N35Iro~aO*7Ppm!gL-P~`3(gv-3_?6#T%8pj+fGqA14{T>U_IH+Z zf;+6`KqZqU_y4g5_>cJcZ?X{+lBnp|6;ydbU$Un;n@b*}GtzV%DgRY^qZ%i$blNy> zK6u0*VVLNn)lN+*wwBu2_@zb^mG5&}G*ECPWp z9Ee}+k>uY@k5>X#-ZA@(^%~pf0cARkx4mjN_6FgdGN%>WtGyx3HRhh5MQe$dpd7za zqqm=8^8*8Hum`+u5FB`pCNf_AV3Fby)ns9G=3-gxn0rq!5>MI4#1gP=h&U>eCkMImWkHpdfKD z7n@~==0Uf7X;MRLSW|w{tneS4IsC`%R&3)2cFWCAsb7qE>g2Yu=WdkHeg1K3UAqP}N65Z!!Y+Dj&MSJn23+UcI29?rP^-7E}dE|yK4$pw&s9ET~=ccBPc zP~Z>!Bx|GeqY;JfCf$<_fiKE99WI+mOWYd9lF#?O_IE!@*(Nzs7VI|m5{ud?HplV1 z-oS2GKaOJ^($rc2p@;(9Ksa|6uI)Tkd^M`$gU9Th1P4I$saSDQ!O+_uJ|xJnzuIg4 zs0VLFJP0B}1MGb&<^cZ_lnnH8R>8u3<5Z&fa>d+RPfo6rdrulQ&uU8e*h_(R+6_2+ zE#ePi`$;_DWJyi`gj4za7BMrOl?LA!^c{Yc1@x7(QHo%9Jw`Rp?0S47aA7toRNcDY zpowz)wP)kLkGEe3Kh@3W{^{EZBikb&>9t4mseDeY z>k&P7PcOtQ=}3dG(i(i`Wk}qWE{&+%4$GByy6HS5{c2vN;o0%^kd3jAA!-EpxGz95?)C0Ouu!{y=#4+`<%7*@9cZ-K6n3Nlt_~I zopWZ+=NZo!s-3vomNQ*=@Rp_0ExWv-hF2 z(4?RxL+C9jXWQc{b_zsgztOLyDSWVK&_x*cf{jTGT zHC~?yym#y>^;5g$wHHakNfh`qHQ1bH%D*gG=uUo#>avQf#r1 z8h4VPGhqU;Kqf2km%fsgjN6y2Cqqrz7Z^Yk-2c&x=LZ(${Y+>A#^QSb=CK@lN-?DI zrcYo}Fjs)fov@};o4ItEAKYbtt#3zh2`Tb2eXm;Don8F$3U~{F!7QdX$Wx>JK&V%b ztjwbtw&jSSd};Z~!}^3B<6mnZ9B`mxRqy59vb$y^4%h?S8X$eu_J_}hQbJizi{{pG z;EQ!P1k1c`KMBSxvjsu9n!^y5X$fq~iE`Y${Z8zH(vOli)@Pr=!Yqc8qLc1AahvJ1 zMI0x=q1|x|k;6?_VV2CNrdu_)2(6Auj6>-8^MOI7f{dSRHyf_0gC2Km8v%{jcT@9c zC?AHevUV_F6#KXfR_Da`bqNhQtA;2=vx}**^V@L0*IzAu4Ak%%2|ih7b&eT5UYAZh zOO{ko;eE#5i2!Exz_5OQJ%V-E*o(f5nbz-hGAsnRmDjPKY-oEX*N3A4&j2!N{g>*- zFXOi4hPU!+Jx4`;qJ9- zFh1~+4Z%zBM>%*sa_B$Y3=t_l&E}YNmYocXdsmyRcrGV(X9&RZy zFtm|SdfSBN#pd)0Tb`MMQUKS&S3~i|A;dy>f(1k!Ai|A9z(nDFYBDwe z4rvH2@X_ICk+V}x?%NwET5*qC6AM)7?z!75tLXdK6W&?hrYmOBKi3|ZO3XWyUF<%! zPk(tmqoeaG5NoAB=ye{RS%=lYONAD2p>zk7F%`3*^%@FbZ719|_U%%M(;st0XilB! zdwZs#@0AU^R8{3xnadM4C$n`BJBPvcWe+?WBv2~V%u=)*tE}B>AX(5463fwrUy+zlieR~nAmac>{V?H!jLARq}0W#>pabt=X%`%ZI z255A~=LU=eXcl8{)wg!0yV!e|;-t0JrS@;=e42BR)lgljvAX=ABHSgyWc6m4^v!c7 ziko3tv$#3Tf*V*pa0)OHCJIGirA#?;$B-*6upRo3BMjQgzg-W%P+M^`-krzb`(#k0 zX<60QQtK^s1$N-jxsUFL3BrA_ac~~YRd^(VQe3A&y+~ax;&Y64j&sZyaTw~w`5`Nb z&ue$bnCRA)9v0_@Y5kgVUUL}~7bSC^gL0Ed%hP;KvKL}l^c3y8IXrgh-o&TijW4czA4-P)?J7&U5f$4EM}u-Y-2wMUYm7fPc=``%mGSS^ zuCl*9^x6ZD1t15cf|-|Av zKEsigk@5DJn*DvZ$&u=NIw{wgUbd{)4YAnlRA?uH@z9LGOl5)MQ{UJaa(Kb!F#i*| z3M+TsJ>U9+&8AqgAbaP`1ys;*3h)pw0-v1)CwU@(v7BCpB82i&%+YW=8;2IG~))!+1VRY?F2 zQCbwUO&@aus%Cngn~l_?o!I(Vyk*k8+^CPUN55NhOMO(R;H~$1o=$IS=6i0=1DFoF zKuLy~9EF3PU80l|0v&11W?h52l8=yx@c2_YOnsyu~_@E zA;Q5x&9-*Ck7FrZmUh}|*ygSzIIp=8@(N-HLz*i8bmHl5nc76|`rbv6AEU1L%1<8S zw~J3~1||tNiM!)J(Cw)rC+1UQFn<{Z(1G-?gT>T+UNEPRdCDYDrh2Rx{_a)^}-v53nh zE7-dI3PX&}-;5Gg6Ha^se+Q62@GhW*f3;;&ERtlV>{X?m)2baW{t!$rCtj_u9}`Mhx{+H31x$kLh`!YgsaumFPU{yVS)4 zzP$f;*X2n@26yiNwIaqNxhT}=_<_YelHF0PwI8wQVSzj9V)UtrH5hZ{ekgQ^Izzx=zS-L!I z)TB0y&u>^#C^&ovjA>LKs|US1K>-xzc0psP-uaWv{QQ=@S6&J`=%*77G<{OM5N=?? zhNkx*qLRVZFAiSPgnaTgn}Z1q)>M@dV>Ts7oOMN$;w?7?AGrf;{u@Ci99E2dP7GNM zvP%H;67GeFMQ^_d6DUSD-)uHNFRAW(#pDuE-YQpWC#*i$i5LmKc+n3`Pn@$LGzM(L z?e&L~31`2(xbwc^+W1v9dn*4+ZqTgJ12gQv5)`1Cd=4Sts~X3polNd~qq!5v{#;w4 z!4kK`o>R6TP?}! z)o?7?bXgV7*p6tISABwQ3m7q^#SUEDWtHwV@uTngMi8bSC#oE}{ou%5E;DU*KmdFS zJ)T%^4%oWy4p~dgN7^Rl+^D;IOF)G~lpW2LJxb`MI^_wbsgyvmV_s?3fKQafn=2z1 zRT=L(N}y)i`MDjOg*?a9ad0m#kmVnfgdl@j7SC(qjK~Cm__QHU_rB8FaW6+K zzeC#n>@{*z*Up^+qta}2Aa0|afk&X!HD9aF%=*%6fmqC*7#&4f-ENy$3ZAXqqTsnxB0s*;U4e5hT|dGe1OypTKW z;^%~VC-16$%U&@wmwlrn6s$NUu=00(DQzNQ;xii*kuE-Nl83$peAV7&lrYN3+bKG9 zSM7cdi`OtST0{bt2W~bFFci1HgJ=hE=w?+6ZLkK&}pLuB74P5Y%wTR;PKOt6iHF$>u2u7N^-Az-yzrnq*9G3J( z9T(rwD&g1XxwRDE>uucuQ+vH=?6E2O{1!ahw`*vLC8$hofP?jY6~=|!e@UPLE(A&7 zDOxgz3f&n)3O`hK7G5*j@)GpY+w&`|qVBl+iv|`IiJX}K2zJ|vgQR+s!)&+2153}n zWBYpgyiOAtSUnvAT{Tt~{J;w5&2*1dhHB{UtnLQotlcsJ45c)e1J=i%%}e+bqim$o zyJc6NCc{|*;4F;_?Yv1N#E0j!B6j4Ys}~^$&zwIrNoLA>3vSzQN(uLP!h(7Zz!Idj z7&0Z`z5RNUu7ZntyQUy~2#e3k$bZHBnnPU)Q9g+KVr+;S>>dY|^(Z4ulbWgMWECiU zdX=yrR;qEdkvUwEap3f%++tE|be}TzC`ekf7z%W0^v4Evi_|6rA7f8`45v_;H|;%9 zw0br5+vWNKTZ&55z5|8Nrw0ro+4&}2aPsVP$#2vP|CY`DHqQD}+y!UD5WizLECq1P z6qX=}Oe*Tp&@OtjPb`BW`lnwX9*nD&jxLduL5xLx81?m8Fa8h&{T<3x`x}QH+kQF_G)wxNS?+5l#+CKQ)go%O8Hb!BmF{{67WXuIi>EOF%by3$? znvv3nxqyJQa-!Z|q~ye40gmAN!9V*spUc9{rq7%Ay9;>yrT1(xeaLY==lbItbL9Z!;(@lL=tRs|E~fyWNG-5O)ifmwayX*JdM1pU)AZ` z9vCWq8U9YkY;bMaxS{?S^E6aLks)T{7DR*qF^Q6qHnCuiw>OrFaQ~`aO-`2@eQt4H z&`kaCS!h4yaVwl3kV}vGj!XAxp%iE#<*fGX0mJvpqA^JaoX($_*TG$3JO-6}GqBa) z#!UbCRaYQM5g0d8ArAs%WW`GHC1F{@;Do_l!M>y3uzlq71LW)o*Zd)-9dwXx&fsO{ zl!xsTip&=5soUkL^X2Z0#L07o5vY9E!3$nQAr2pwejSu{A#b>4Lg3JthGvCgjKr)j z^4%%J`vPAQBX8MpsV{T?=APlCRP3LICU+89xj5Od&N}e^e&u^QdjL_3(;3-2HDOkq z<|P@grQuPUbISUJo2{Sl-Zax&{+CYKoKfMZKxQVxFA|3NCJa--&0T@$q|}(T91%tf z-EQS*Ro3)k`*np~9!~@-HlC7lh99%|2Fb@oU?PwStq^i6^rv z!7Ypj7BaH;Q`3vv zYu~pCz?^vEZ}*)4klgOyEf3wYAJcExJ-E^Xc5475r*qc=pabaKezI-etSy;#N30!R z)n}|?nt(OdWYZ!jTw$nKR`%jt%Y@Ln=PXwF1`&Q|8GZ#MvryR}FUubjpcS1{7K#HU zUzKJAC#&DsF|!b|XNmjgn=}=Enj8tEj*W#~s$87}{Wh2iPAeUB1d1Orp&!OHg|dVh z#?Tb02;LSh%24aq6o}2|$>!MWcX<6Z@q`lp{uQnH3v?I2#R8m~ zsPoV|t(BS6AcBYZTHFN@VspBXlUH%%nD5Kk;80T*u*#WD3Y{e z%tx5ZI|4>%NQ!1Mv88b1(CUmqVZc;;Ss8k~HYgDBwd+^+Sv3MG>?hkhl~ycMxw`0e zzN2oCAHCa|_U+EiWW}Pqkx6tBs}n&viGTs~M`!ys`z;I=Y;VIh^R&Q-H%RiCZ#$zs z+v?vx>iycnGWS29Vxg_y-so8b zk5QQ`Y8&APSGM+-)#v>@UM3u1ZW*RdMfGklkyFWqges4 z#4N)2<#1t|Tv+0a{XnQ6dYZa5_R$MuY=n$WbX$gm#Q2_=XEB%R)%OVF9~)dgkO?~^s}v*6V4U&1Z#yn+JSKJ0ZGJhr=vj_Sdw9qN@;lQ?ZOdO(HmTO^{|+5CMT z6lWXlLSc?d(wx*AWDa6^D@OK1E*_;&Yr2RMe+?^tOAU4CJcE0c zpCT8AqPzmzaC`W-1I)|^tGvSuDiR>*Q0$zQCihAMMWB4pq-yI6zd_h_d7XBH za{Ks->5Xf1Ro6ASwunEF)3#T86-(Z)o+c^CxJ%dw0)GMlZQ#Ex5owFZQpZR zd-{g;X~UCg5@FATIywVbg6?25(V01AR{5CfxNMsp*`!JM5a%WKK_NzA zXbiOl%g@AZiX-s%MY%{ z*(@U)_hiHcCv-5Ccyegs16VWRpllTNgAq=RX-Xui%o|eYP9{G{R9DIGl&XSoM9x z^E3u1eVmdFrp=^|XMrSL15S+1)*G5Io}yLXxSiwCEhdVJAGV;7DhQJ zT)30&m?=)+54t{eN3ZKp7)N9q`o+luFm(t{lK^B0`PcSrC|Qbm>Tg*p{}S6o>;(JY zC7%DxzqyG*3|WxX{)t{{$T|oh-nCBn-IB(ozjPgKFb-fu7Xck>7G~E0N$nswSaNw zLlraZkDu8X;7pJ95O-klgisEiIPcN=M2Idlt55GoM1kG-&wAZID!Bjf{=fU||BzVx z->o12UmT0CbP#Um@ab#a8GuW#2?_d?K`qBhBMkanZk`=E5%u-dIbups!^PINJE}aV zY$oaj6MQP(y}czXo6=8qhcTZvutWAFyL^gzGmEch-_llJ;jy-eXCr41OFX?U2dM{@ zj=bR+J+@6`3)aYVu(an;y35wI4hVF&5TF#?)-(HMlDD3Dh_4`S!_Ny1g_71=8vw`bK9LC&0?qR7x5QJ$D@Wj`a0^?mnKBN3}Q zxzrRxO+lj>x2hIG9C{06ORP5`VVf5ua4ik;y}OsM29FtVR+Lw!Dpj_LT{tPJ_(YXM zxmIn@^^VMp%^NVa`gyQqt(&o67nne790>&~aInU@SUG#uX#>A|wvdBGweLq?4nc1V zm*)US^9b0+t6W(fFd+uOM>y9Zx!1GqFyQd%VN>S&lTG;COy77_T%Do$w7-@gDc0cT z;|$@?X;#~|YP){e{-BC3gK0VoEGhxj)sYf9|D7&_5{E{}FagknwDbkdecQK}VT;OX zlhrj@K9>BMk^+jU(G}<7PrS*snnV0i=v{@>!DR?CAJ>_I3{Aw%+kH1>^~Q&S7}S{; z@U+5d>9;3eC;S+=5T)G|io09PHhrj+2G|lMeCV-kHfVnY@QU0RD2IzfaV_}!S?IKh z<5H%l=A4gMXIp%q=g+-R8+7()+P&ILg(7qkYGZ@`5(IQt0HoYJH;)hm7)J7k=+AHc zKw5vitalmqlvOfFglr-CPc_+Dkr+>L+e?=g98lDmqSrvxAKH> z`# zaqo;wOX@$j04D;_N6vZdYy_4TK`~NTHxzaT_$tSDoXpfc%>o6T8ysnao9w^P)gAC( zxDmEp-T{iHH33t?ZTIwcLzu>a%x61YwYgXZDo?VqZ$COkd)h6>Cv{GnM+`*0mr(Br6_pKz^(vz-!pXX0rv;So*9XAnr>{ z&c3`Xlz4BvpAs%LK5&CAiKFrWk}T^{030-BKvd01YOs|Hop8^k3bilq+xhv!R8ok9 z$i{6Ge}fW#%P`knC!^;&(d*D0>j?8Ot2*l&Wu0+rFJ^9Bi^=IQ%}v%!17XfI1~fp9 z7>1O5Uiovv7R(Ti-@?~{Yjk~Pwl=`yWu1B&0B{+#AIz32L)}-PIGMhmVS?DX{hlpp zZqN4?ul@JVKI(D}3J>hX+IYXH*EdG1|hZH^tKIM?h&dh0#2x>}uv-HrFJ2G(5jZHD_qoc#p>9bOUxWC8IP z!_(%wVFL5E?mh!leCMO{33>{3N8h#`Njj*qGi&wri2~#8HaEXIE>B-S8k9u1l-dvi zoLDjQN-t2ZfG;cX2IWq9fK9UaJS?Yt+;Pl*Yu}T-oB@sZsqrhd_=aCe1szGpVF0*5 z%yu$Yh5U+V&E>wuCG6vg`SBoB;{oo0Qu_|hcclgcvf5vH=G;P?F_22hyviGJPuk@r z(N1Ww9)E^X5oDlYx#X11Nk7&*icgID<~%2$dbegJI@aVSDo-4=2A-Q(Zxu>_QLYNL zVB+Btmot_(`_A>eZR>lw+~xIhR^%X`Vd37uc&6Q#^n1Pq+_Cu!xoV-M_Y$#d-`_vD za&Iy)(?4!)gKAIaZVrep9kQo7wPtz6k_SNNzg6;UH>)d}CCVxtQ84Y|4H=l4rp;=x z6FGHZ2M7o*Q(E2in9u4EFCY`!Y(!a0V4<5*X@}Oj!uY`(ILa#(ambLq6Lk_`;k95q z-i$I_nH8pbuc7O%)s&$@7SFVChZW5vV(Zw%hY!|4Jm0?aD5c369^S4Pd}@Dy1TvbO z+PVb#*0!6up&_d-jmEa|tbQo5^?ju>R?E`;tPZ8}wP^R`B z(WPCNRD4S;9ioi4t2ZjiA#*kCWX7U7(BItonKi6EtZsK>$hP1Y7cv!P_R`8A)w38s``V!Pf@0)i z>y1cZX_p&Eu75Atd>3RZy^+e|v8Ai9zM_r57zMi`JKD1adP9mlTJbNjJ`rKBo%83D zx7bnNUb!srz5R{R5xiddw;%Z=@3y0-LTxjBL+ouA@)ORyn7_9K!FqsQEa2$LAj7-b z8U(&+RJf_1xZ_n(SN7m?l=l2)$ z22z?vvyd{ZBTr$hIxxhm&VvrFRgu_9n;wwkw1fJDMNx=8K-Wlrmt6fP)FS+!ksYEy z5VqSNWS{OJRGQ}caOKCJp1B1V!u-{t3C#yKu=j-JXlxYBUIa4(8hn(D|3AE+zvlq` z`QPeYpxiN;e-4m_?&kn4)qxT?*f;Jy08&8rQ<&*PSsdNqB?mAMRT%Q%y7nBW2r=&! zfvDSVvkkNJ3=<7Ld6uwzpTGu~z-qnN)up#i%Uch?e*U}dO8zGYqxUUKp`$E2mH^n; z@-RfeZ=(|CE2w3}0I(y&`SrELu8b>F?32u80OCASQ$A2EJ!@-rXzWtbNGE?WQBQl^ zd|HjeDg6m)%1JQqxZ9W<2=Q+(Y2sE3EzqD)-?GsFm;*iUSv>?O0K~WVz}v*Fpg=5{ z=fTXwQpT~M>Bit^%;k&NsSz`+qEm}#EF%OmBogonL~KF>2kNlMQ@(cNGvlckTIlk3 zy4mDql_5fNqapYElH^|+wV_17jM0iQ&S`|<9VVVZwZt94Ms+3y-cBF3K22~rcZB!y zx%A*nT~nmxiuuWM?H!#c5cCTDfUW~}AC7K?A=P78ws|aACuNG2oJvap7Y#U1j7;Yi z?&PHXV*6GRaRF(967Uh~Fr(>Cm?lGWDRd9!y?G*?A5u6Kzz!i=`g{Zm>MN>7j-Bw~ z=e0aDnN*VwGfn!QD1uYW+7SBd1z9oRk4arj8Q1~{$79!ivTbNq2A4^yc4Ye9uyto$ zzH;_bR?Q{{{W7aNB7G zWK*1d$n4P3kTTlQ{9r@tpSsYpSQ$A!)ux%r538%oLj?|kn>qP}GOMOkyeP@@<@-?j z5tLgURPcKCB}@gK=9zcytd+=JM;?vS?=@R9L+>sU{bVeHKy@r&KJX>F1y<%+F#r7p z6on9J^0|o#*ZN*5k<`v|ykC-0YR&b9C^t%Xn(F%NaHT>R!1tSB42eMy#7qO4j`tG75FlBff-!$t`Lk;0b<)$HJX`#nvODQi zg4bdN8gx-?#V*G$-GPJIB;_#8&pH97!k^b>$wTZFp%hGc976)vylFMRhi=4RgAyq# zjhnQJd<2r*jui=ZAkib>w!MoJb>43;`oI?Modd=xH<*F5&(PIeL2SVO=S8{Hv7$F^y)tc z`u<5L`=?e&V%_qQKq$$3>BZ&VIQvZyv{O=4vu7_h8 zvB6#I0AcI){k8FNL};M)UtEkC036-FwD#gSF5yhYIfuFXRY6m zw}z@3DJ0zacH3O}?l+fV7*@=QIK?neC3`Vhvb(fNJpso2} zGmr3E%YuJxSamAP^jJ?xV1URYu7?5Ggn63K@K3f|&L99$0fD+u-mf1m-Kb@tZ*)19 zWuG^<@0a(W*^^pWT?k}P4`cNaDsmX21?1w_;AA?TjgUiknCn!ev^cq#C>iJ63Sgx1 zj^!wYj`>sx8Xj}e;(Nbvs~=8U#mK;?I~oiyM2Spr2&|=-V45zVRgncp>L^`zuotzG z@Uc%{9ik07RvDuo?A1HG6F)sY*mmxm9ghO%$-iB^{8zVBz>bA>VHZN^u4r(%@2WsI zJQX-YG#5*f!E8PL`U+MIy&EwcMhlnAZ?B06<$B~L`%I~AVU#~9aQTJ*ewcaJ;ty;V zVi-6F2;{Rqg0n6Ug46ZUjR*Ght>;#9@{l_ zYQb-Ho~Tq|jEH<(;VtOwN!Oed#&wcVtUPajg94Jp(ojF#U8K~2t$S?lD$%_sOzz#b z=2t3_IR_Cbju?u|QrvpD9?ChIh-q_4y?sx@H$V>W&E;q8a>);SMSJmFUM2BqTPRT| zUBwT1|60FVVPEPRGuU{_aXa*ge%<0cS&xb}aU5LW+hFd^*yFB8K5G9mn% z2_cF~cSd(**)O=!b)hAi-@p>$SQWK6G4#dbkApW|Dq1}q57hY&4f)jz>L3!Z;0Ob> zr3xX2D3JTfGPA*t1v&iwSdtKhS?6q#1N?I_1=Gv;X3i7(3vZs?yt-4~!wbIJDrmpR zy`U6IKR5&gx?88_i+o-nkStZ<^2CUcag%L|BARPyd6O`8OlD8$!dOfv`T#1JSq%5@ z!mALuO($=*<@Xfe!(3 z&!ZRg#t$idxw_o`F=M{Pf8G5NSHoUP>Ou$+KAcIbhBN>R9e5+YtcTV~@H-8P_S=w| z2dRQ(o^QtSqqBQT+c$fNRWZ+DK8=sibNss_tNmz%6gc3OCV`fVKr9Q8v7&~p_?dAN z>eM`5BV)?4PF*fF{_5qpPv45}j&hy$VOJilZkBtYikE130I^d;ok3s7#FhqG#L&7k zMc_@D?JH#B*=VcEINNH_md$08cJVxpX9^=P<=giaZT(U|$55}Y8uWE;KjjcK zXqBHTTRVXD20e)p3wNkiBv_QW@*H+sgD^uBo7yrc0v#k*4pb}zc{`rFovCMK>f`7C zf~`>A<=Qm*!Z@oBA<+QzDync|4oi^=P~#MR`{e9>)dLs$Ock{5XpkhgL^z&~?ZtZp ze1Dv(y6B;>-Sw_^;qqWwK@`>(4ygz&^i#daDlPW0&N*#Mw=pt{3n8B-D3ck5h2G9z zwteq-sO){@?lGCN_w1*W`TuQt!;1`17I{0SP z#oe`VCThomXYKO2iJ0*K9XSr;H<3?A6EZ(_+0kg`O98-h47+jG$NXGCXUW>oMi(ub z`4F_H#=f7L|MVISdM7ep)H#st;~UufJgJiL; zVtXNQS)@pQkRirL>x8Hi*tnchp*EqRmR5ykC%CjJBHv}N6X|7{&TQpOCExUf+U1mO zu&FaUb?l7Z!tp|6Kv!F#eP%wa$Qf#BCX{`BO%#$vV(lWhTeFq>m*(SQi}n^EG-GUy5UZt6SjLYwI0w9c(pN?ct&F>eiU2}>-x5g0aA7OO zkSx%JLx?g?_n9k01AR@|>OH1MG;;gK)_9a3sPruqJv&o=mJx@4Jh^B2)|YL#8rC38 z1mlCj`?7e&^8pM9^h#ZF?hGADdopR)1VILhv$94wWvCZMZ>@Ox_;ZrR<%QhTso1n6`)|K<{7H%4OB4db*dtxs2e@& zE3!I+;+_awEld^Nt9#{!j;Euw{48g@t5r2uD(WCv*8l3@YCa;8B}l<7qyw0>$^$~oPQhE13HRykkwXLkrblb51<5T?95t5aiXiMjA(;ZV!Fk|{$Lj}U1= z9zLwqPLM#wm@86otyn37Z&u>w(^ypDHt9E-H%g2+l&ke*z4~9@v_<4r z@S69!zMNtl@uNtxk_j{r*ES1)L>uVU<41^EOd0F~pefy!0h#u3^uPuvRwG%#VSoG; z0-%7m08r2IT{6Lx4PK|(vUVFl;E=Oiq8|XW%>}ZwbgRFEgf z{`RT-Au!G{oS!fs_b*87H%tMJt{xDr5R@k3ZSYPW@yv z9~_4b&(%NxR1lUyZ3O^ADDxSH5{+Tm5;>rBaj%c;;D(uzYbdc63qpC z73W!PnDRlKnHJ5rzr1S)N{xz*XJ@tcvUYg~dE37!U9)xax@U-4%=Nr%s;ZK})cxZQ z=O`sCK`qLUJ7J)F+CGR@a&@u9|743qS&))F4tz66E%#Z+;%V0&okzcN-XkX?^*QII zLCaL{YsY<~8WB9%+!-1L@0mMKo;4S*ul`o2^>WrQF1SkK-Nz}Z4&5!j;SKj!n*Ywf zU(o*dV))X3Lp;A{mE&T9>WhDdkNQvlj;m8DW|Al8SMv|IqQ8fac?1Xk?o$6TyZKEU zf`k&YvHTJS>T?=3=V4^qbtiLkic-S10R2Lq(@(an>%8hJ&^RFHCr-w9EeKHfTHfh@ z#gZeoeBS4m)OW>^dTabi^%TA?+7--QG`iVb>SjFSFg3Hu(Xz+(i}K*rzK#=ymwvc& z?FV}LAu*>Zi*<(h3TxZBmq*`Sft7o;#Jj~3Wm+>hQgT*C@O8d!;B7X*5ig4}Q9Bqx zKZrc#n0^g*>FHwJr=-ainaG9g`VS|#=e~ys+#PN<>R%!=}W>2+oN1KZ?JgIy(J08U55Fe+1e}?CP>63|zWd8_i`xQKu9~_()=oPR^F(0 zbzP};>&=UTI*h$HFYza^fqf%70~ps!8Nxu16cbQs9ay820rT*qF@aT6uq^(IBis~- z0SEkEI0#I-;EpgR)o@Boo@^H@HSnYB_3B5E;}D1kbX-5 z-QNXayFmm@^MCOt&ZGQXRoECEY(+S`_Lk?%YV-jC6y)UIPfE_#9V_R(w%l< zudnCxD^8D7_j^q7(9+u3Mun=b2~BTFV2JdAkQg|2va00j%+;Ai4AH#g+I&C5M&Z$S zsED2)Zij2G6TX{|y#iQhFt}3>{fiwFh>_wS`<_0^XLV4NbFOrx-`%Zs-JsD(HkPcA z(g>?BcOuroDM#%gRma!-JxCmF5lS?Cx{PVX+Voz%l`6IKBKNKjjK+iz0z=qpVxR=A zRk1W;Le6hG%cwZJ<|}heJIE;E)4`-I++qh;TsQ$7JjO`8VMTD>bVSAkyp!F>8$nf&B z#~Ht<{hTB)bU)Z#Z&3E~Ls;!Xn0HS2X{mMb&N;L>MJ3i*k*ZvAq2STVcfQ9DCkRzL zMrvXM)1Cx>ENKB#AMK4H-gA1bP1TAHDGIV-F|sn_E7UX9*w}+sm^~@%#bhu^(Sbk* zB7JZayCpa=^}g29V#%c$!sC?kTTGb&j9=GKOG`!$UVay-Hy834a;|9V)1}e_r*-?- zvyGinkZ~+tP#EK^p}hH`Prw_J?(4eTRlJQXITm*R2|K;KT5B}eo!d;k9i<0;OA#z* zIPyTDH@0l*c2L3znZ(0E*Knqf9<3~7M@P1SYIGcim=01P1n6`4QIaThD^pOBxftzZ z_ys=q!Vf2Be1iM#Tn@#p3sM-2D^{w+GIJR+3WsV(<_8Hrt{!mpL&Z|$(tFq2_1cgP z6<`6Xft4U!E^$Z8+<5r2CuhlUu;0ZwnZUtM?<4AwU-d@v(J9k}clPsIq#jietu-5V3o6fW%YS2s zLPNku9RV+wasPhRRD-Ahpe4jBdbt8elGo-f-xoLC5_I0T4-Srm(#-$s7c+d0`70vGl% z;8fsGF$)+9UPOC2H&hs*FkP_Mlv(#O=Rocw?WLF z@&*E_WcmvNLb&N_`*A(2NL<6v43J8CPDCEdoXFtpJSdhl1H0!US| zl`vA)1QXC~&_ZcJuRUKBS7z}iE7#{fRUXs$$rf@InXzU2$3uJeU$kMn*nr@+6j%IK z>s{hozYn`MwCZcqrjBWWy~UQsE!5IY(0F5ml2;7Aet(r(%6!krmCeg}p5{+q%`AW3 zW>H;PV?CLA&7n#is17#=8pN3Q;Q_EAGOY$efHq=6M6UM%fr4ogOTrd=w+uRzYFmDN z9V&C7`UK16Qf|xEtFau*1QIvSek6}ZrpRI|a3ELkCh#%DdMoPJfto{>X6&9Pn~|oc zC~{>`(!{^NM2!1VeCi&zqcNU^n~(437wmR)W;tv1U5qHT30Z~b8LLx#Pl55PMjfPT zF+}4?h6{X@^p>oUrMA?h@JV@rp4HlD71}ZBC46>on37*lP32+XkQ1aw=PVZmB0T73 ztgk?AacU`!AQmjN0~Nx|1&3X{v_P{frrCl-qqODU7OqtY<2aT*WTksM-CLPh=^^{t z|A;-es`9M9jGIe+?K#Zd{@{hr#ptY)h!|t1Hm#9p7VkkSb^@J9--|jOM=WVxirEBT zrerm4?TSG zGjLUbHJd!%BkJCl>vC3q0d`VmVX^ZA>b=doc2}$09SiuR7Y-S`C!<51Y}`U@7pwd# zx4IjS9A3MZ!14wytmDD$IRY1~xcLkDDDNJY9*~?Z?){D}i`wisSoUqPqNCO~)AQbp za{OUWe&14dS>ami?X{wHMG*p&!z|t^vY!cyhx7(bAjp^dMp(6*3(y@CU!NV~;FrYr zSIo|I7ju!eXH8h&j?pd5@MORCeAcsn_lCpqmwBFJ3q6`qDasn|1CBMDQ}n|OLPZ#sp;hQN9w)}+iS7? zrdthTH6)LfmR3i;@b%U@uQRqm$y>0Is+{0{RL!1>w;vLMQJS$_I>4dEFb=yKC7m_o z?T{CuIJS^a58qjE=CfnPgVdrqsya(h2@oQzP~db2p9HYPndN4zpYQMgqpT71l5to= zJjzEzxyLO{{h8$1@h1iiA`fN`6hOcZ{cnGQ{!OQ8@yy?QQ`~%uQ`PUx)hVd#RMm_*-A79%;5}f%EC||;~?2@OZ$xz6DG+=$5qsV*jtQ@ zh^fuD%z_di+^-xG5JZ|x>vYp>ChSw;g5_(Eck5lBNY>ps68N;Xm@5!>bh&FgC}9J1 z`hon8gmV>eyb6iT!y`Zo*nVy_`x+Og?tXu4%M-L1`{rTb-+_!~@vVS4^rLh`So-6U+yJ0hq5Q;nj(R99a#`P%oxk`Ww0Gwa#Y@ zAmK(QuuuzF;uH*0GA|1N?hyK^;>bIT592TP*6F-*t&QP7`%0MY(8B&EBbKuSh7#yC zpIL&_gJ2{?d%$*{$z=~;p)KPdOKsc5)O82`AG4$Q>PBF_~pA#QF*o z*k|^BAvpvleLM)&EYwMVyl<=34#a)><|Q0C03>VYkoRDm4BqZW6_mQVaWH+V4SeOov7Cz-VjQ+YGQQ{wTAx|V+HbGe zyOCb`=#6p)&)5^ooj=(eb=t)iXgKjZ6QBfXxZbk<;q*OV%Td96p-Fmkn*%Z@u3=Wo=G{GHxIDv!D(lO7 zt*<3LZrqk#Yx*8GPL!Q`R^W}H_%1PoamP}4DlqJGSs!Dr5w21lOHEsnJnIrazO$m6 zSOLRs!WDuvOTc@}jX{*l^I^V=N5&%dJh>tGeQ2A6!jH*OEB&JaAJb9bW9Zl^9wTV(c1K{c#F#H}3p?4TmA&|&i?_@=&VeNktP%}SW@-lu86lK? z$9FHf`er2K>`8b3TYG072xY%FeBDT7N!holtcC2m3@s$pv?yg4LSjyw5q^_vt>*d!P3`?|I(y+<#c+=W@+8zwNvH zzTeL`lKk2VMBsoG8rsV)2wYNHAzQc|_Z~EI-W}(}7OT68we7`>Ct*ltOWe=71SvNm z7tc3BQGU?56QidcgU4Jxk8CxSN*s9YbBGQEk|T^WFpa^B4b9-}W3>_K3RYF<`k=xrF09mEUxyYWGz;E7P64kqKSoo=SiH; zMXi{BObYzkSEu)UWIxPU1%IC>OM|MEL)!=05Qfh=WTB8|rFR}y$Bw=ix?jrV!ja{| z#M4xRs)J`WsBp#9xKK;w7)e_oZ#IzmM`f9pXNF*hpa&Bws_L2K10>B-nSN`ocJG?0 zon8;R39Symg~3Y-KNEu?@X<5Ty6!uVQ}-Uz-=%Lwx_-*HnUkx+wVXS8ErSUl^-cF3=DM7Z;dkrL(=C+XPXxWi{YV;XHR zo@v8zN==zBd|GuTk8U-tuj^AdCi^l(K1@jbUV9#T9he1oo`LlNtg+yRt7X!FQEP8w zqPo+3&H&z5)Gxy>dao?S{g!ATs>^8%Jp?L`plbzyGKp?u@b zj+BdNSbRmX{YYm4Ip)Co*@l61(rnBk)RjWutorcK;hl4qU+A+d%4p20Pc*++0B+y< zYSbGr?eb_QVy^43GGusn(#53k7FF@CxTLS9VHU$*HpzB9U3^<;D;EP;z?Ba*3bl*% zO~^~*l(6?%Z#5qg3Jcp~!bw@_j>hNuq|GO@o_gRMKrI0i+nvC7jG*3d6*<(cr80}- zE}v~Voxs!aK^+m_!GSPT{Y<9wfREqpWJfYXLJ9%}{?gM?VCQ`z^7+Yw^MWTyGHC^O znM(|7<4p~+c-kux!5p!tmui^Jz*>H0s)}nf^>7%Y)(R^sIO&DMboa2a@YIDz--#Yv z2}LIVJ8Y)IXvh8DQhihmcV|cD}Fb>A^5soMo{VO|*cjc(vzU zhBDMh#@Zwh?j)FviGFqt4&p!F%I9{c-0@5ItX&TmAgiBq2iwIorcuq(RGKWE5>wIC z$!{<+>2ZC--epffZ3m# zV;W^vK_EGxfpB>4&V&-Iv`(d;9-PhN+d@XifX_Nb52`zOV=z^=5y@(GeE!8t*V~Vu zHMVbJ9c)W;(Qoa_WP(Zo8f3m%0DauoYeVou4r5Kx|a=%_>@ zVMO(#ydtyImM3}zc!75DK-};wC%`Ff(5cqq zgByr|B>T^pPfO!VbUmxVrPwq;I-kdEe$DJS%?d2ezvSKjYv}2zaJ)Pvn_ltF5y<+S zgbk-{Ck0PihntWr?%97(Hzz9yL=+Ec^zWWSR&$>A_2+kt=zp^!VTkx zs{jqiDnCHGU*C~jH4$?^?IcOykw26BrJTFY)l{2VR>{hsrNh_bwUG)hPG(Jh;lV4l zGdQ7h;0+fbwFyLoUwUOo{#BrpdeaZQUq2o1QJhz&bw)eF`Tlcc1GTz)ze-1mKP3*z z8IKzR_zqK23V^pYJ|DKCsX%v0i(s+&nMBCO!(S?R?{kTy$Y@<$&arx!gHJ%>FPxZ? znY5iQyNhsoo%yLgWni$6A$l(4QG43x{FkY~aDYnY;3rG0Q$jw#oj{I!C z);+D8)7P)a@@ryIyURPWkuRqgE5N#y9eL9eEGnfK&_zPDi137hdeT6UOh@XU^WU15 zKRd=>RIkR3(8RvLa!y&AtuD(urX(Sdq>dM?fp1H?^L3QfQrIA_yV5yDy_ z`r?Jqu4dwdn@0|hkB`{x7lE-XQsSk7g>MrA?9089SUfvw)c4JOS-g4pd`ZUgaLF?_ z#u4*piaQ6o5wGZ<>4`W`q9`Q?asx^PUKPPp6^h<9lcFVggltfJ^9z>?;w9Vreu0q* zL>WFfS5iN;QkMe)2f>XN7`G80g7h@;4v9*+`q7)`&IN&&bC}^2KVn4&&O6U2QJPpE z?ck#FJV({IudpMOw~W>OJ&kR0EE-ekL)%#}DY&3AAuglDLkoaz5~}OOC%CvZtWXkO zTjzB=Xm!{`jO){8d-m!MrahJ$RTNpErnoG{VoG6mmw=n~aGL>l6_An0TvSN2ZbntM zTeFG3_Jx6ofnx`XS>hW)uf_So_nbbp&~LK+xc+j|Z8s~6ymCLE8OlPGw{te4AZ6{z zW-IkPrtx&W0Qy}fH%R?)sO(vy*$^<%BE^-NZUzY=?yvR$ochD5*a7V+|8TuSzQ@fM z4_=pla}7QY+^6MALBQI_Fa;>ni|V)7256GvTII<(sGD|-&s85~L=p?tPmki8Ip~i& zw=%68a@0gSsGcxsvd~CQ zY{}R`&t|Zo>cPZzS4n9`%ag>@pvVD#b`4Rlb|qai=S}sLY;nm%R;`J zi+!0|=kq8L{u{Y3Sd-fAo|aF%GdznDmuHJS8?Hd`x5X1N?<$PnlW)T_ctUJ7$Z@ot zf|H2WrMmz+ST6{@ZMEa9-vd>yw=smtS|R0C1KCv`$K@Ekj=p@?B-l(ZOAE(!zZ$YM{9SJjg#Y5$}-w85|zu$!0CvpFkS-6N$`h$fen`+~5?}Ios{B_JGw)x3a0u zU0A2=?y1LI!8lj`N2kmuXH6~ik?J-%Zt98X<0HD)N{%Y za~|+_e#;0}eNNlC3iq$mcs5%j+Z+hLG~l_nysWJ6a;ooFFR?R6#f%<3O51YY@Qp0f z7F#Q-0^?)mPZ$rSkjhLL<_gid&WIp>Xm0UC-)YS3n7PU`g1SDovK(K(8)_U ziF3WAnX^0W1br5iKElxi*PHD)Zz(a+B6;e|a~zP&TLoK`AYE9?)-S~dFmyMb<;cQaq_^S~U-KOX9zAM%8rROO z7UD?Yl%cUFwJqnX9fYOa?)6Ak*y1cu>s@2$A5okr=ELQ*|xx z4x{YnbD}-lhdUQjUpg+)&bNg+e-yptB$$HFfr6anZl;II1W>#C31`YNS5S^8OOKv# zTwJ;jR3lgA%?oQdV8T<9`txFF9#mw&)w+xKLj|-fJ>o~b5->ee9g{&O67^ZIO0qBfqnV;IVndUt!l~>#Z`EZ?@tz(k$YqB3Pd}~>+4)gPK7t{upAXpab`PqXTxA2w}%D22mJXRxQUGZfv*S6fJ*DVhu0T3 zldTY?I%aeMU@8LZ820*OLZfvnpCG)U7;n?4B{<{0N* z;vRgEqwCO)h6XXfc3&0+$%-M5K^I2)U71;^SG1WsE0uA^49@9ZqN?5DSnynx+;~A} zQp`dEK0ZAL$#CbVsG{}+_)eHtVHCMj2#SY{)W&x_do9_hBg9jQ3oOU{gn~8q7U^z{ zalCwQ^m6g*IxnWtto_jvx`P(L;)T(Uc#}Xkp4D{;#kgSc%q^Vp?AEiRtUlY+s#p$o zYKh&fLP}-;S6KKO?eKue2PGCp3$EOTT(j|Du}*?=ePQ^O@`Lom_&zD_hYRwXV&J$k z(*^WH6MD?Dvk)anVS}FBI3O%AOg**PZ1(=xxvvpgLSEgYLgOw>Q?^J*u8Jm*(<0y_ z2v83}YugUd;(umxLxBp2cdCMT`0Woob6|Z1j6KlWMULLg3oN zucgyZ^L3M%Y~x42gR$Iv9PKAkogZKc`Y>MK42gu%_VWbSizQq(d%b>dO*quZh;X-V zXW##wvKi;B{Q-AWbmyVs%6b#(j+4 zt+y?g*LhrMxn;Hc5o+XMsX5`>9Kkd?uPMnv;gq@hs*wX5VQJ&(dv^=Ueu;Z`X`3iY zwCn>e5FnTuPns~;HK=Il&Mhe>TqHW}xHUHZL8j$ddA*i!Auci)&z zHjC-WFg7N;v|EZz4vDWyr`g zgp@5dHVQBKCz)rCBQ{KQSIP?{p4-BkbD~I?B0f3Y_8(+S%38m{7stHM4dlyA{+sXeKJOu z=pIR4qTSiWsBNXq7~cLqxA8wuV}9>XG!y9IzsK#Taf6Qidx1rKpxpLfgUGm%AeQ(3 z`$r5~<|fyz2VHX@h*G!@K9&g9T}eXP46r(WSbCIsV;%H1omV1}s_@x#xg7?`{+^D1 zBK!M0k>n|t>u{8LdSv}B@VJKXD_!mscL*mv@*p|4XXMgV%Uc;IDUY67DktCm4WT_U(S3s#)@-61lg5^h2JFLf+V*01_(O(?@s-yDAFmfq93B|cH%1M zU8AOCS=VuUFB_Q@QW_yFf($cPR^E1nEKeuH!U_bl zn8g&5rs@JpIRZ$%JPFt|+nP;&-EjDgibv%X4z|o1onL;8?p6+Ikx~#)jyJg$K^jP7 zi$*T=A!-J4Dv#7>45&SbG!5dOc_n&ySG1R{;$-(HC5jZ3L*C91Xm5!Nt=~BqaLj4N z?)2*q9$!;0F*UgP$Vl(r5~r*wzXFPi39B)^TQ-5k6Co^?l3frysC--D#E3 zfzFY-C%oHYe8_@^F9xMaM4DfI^7;#fw)YfRwZ2~Xo4pi=4r~YRm&5U^(J7$v@YCJ` zb7l|ycBUiceIyNt@-Xlf5(!W(U(iArf{^Rqw76mHFY96Fr{LJbrBoNYSXkNT_%0wQ zxM+4eULa-1abfY4mD1RP8-@nqEMk5Z>KG6jr=5T13!JDz>fHTV5W)H-$u9`?jo!bA zgJc1{`9?EkZ)TevKSK%{piUID0(gIl@(2psrklimb#U*{Z6go&Yj;~KisYM)h}Xa3 zD4cb^bl&r4HH?kD`sCiTs=`A^9u9r#D{&i)G;%R)5(Ws=1PxQNL+C0-tA?D zdyz>*PLAEnEK9Ogr9aVUo8SqrvA7$cRxs8*0pDd~)h+RCN0p*Rr;u~9rvO~#U69JC zZialGgUdqTOFTuD^33c$T@kAaRg=>Qhz0~Jt_2Z|U&{6uYOE>Hasirg0WmP%XO0vX?7xvUU=o$9R??($6yCNB|t4_4AnQG6qDyh z=)Lti>yw*ni|JF_&M_&04)Y*{f-#?$wF%LT>UW8@4NA#nbi=mka~0AqL3u?N>z~#O z_c#S_E$i#se*o+0X{qP$YHRcou5#yQQ|yB-F%1(ZAdnL1TnCG;p@VwCoPLv8nH)tu zH3sE0*zO_4R*+WON4F*j0oVxV{Jj{Br`2bB%=lYaiHm#UjfSO88MPN^01zu~yaJpu z3D*mnlJzKd5bq@R-oWO?S01jPqO7buw5{|cW$`ZFW)V$yZ?bF|IrGv_%=)@_n%5cp zNO8%i?HyuPf(wxjN|P@DzL)UK&74UM9{aL zGxP(fi(@=1EI<-{#C_RUzvXA9m*?{^vtt*rQye(#7C`*W@V1HWlm)Z0oev zX8K^WsLqqfo)Cq2BjonZtex97_{&yl@)Vr;hw`qQCtme>7orvy7AIU?f}MH9;$PuA zJ5l>hSo|zH$I1Xvk}8Mpz=xGRL%(NmSCIPLdy53!m^;Y6`&((RCgUGT9_mVX$|uIT z%0F^=m(aOL|2FHlRVVc;mD4qcE4wak4338rEvS;9C}x=nG?edNb2dPyhqkq{ik&5f z@4AVj3&l+#S#wymqbO`-Z~McpyVeQv8>>+3IIuT$!AMyfh`2K3A}_A#ti6_D4B%%x zmI<}}`N^LhnYYa~87*vJXTE;x5=CIWO!hN?52C5N4l>C~KQ>yDO}=-0=r!K)TJcpO z!}qVoA3`+uIe-)?8w7AFHNtYhKB$jCwZSw;cp-1q1dQK1QW>tC{@h{_R-?_vpOlA_+b;RH#_63yDN2I_#$mLJw2|y8?i zTh1YY^n*ROd#XCPhxb*rC1&zXX=FQ2sA6sZOIbDU{!tanqi_O(fb z3)D6@7%afP26`|FJnvjXsN2heRb}N?{&F(O6QAFRKNVZ_4S3mb-3p8R=4}buRt8KK zrPYVdqVW|t-hC2ni^u6Q^v4*-xS;~d@xBT!I0a2gF9?q{yp1 zk%^UaOm5uX!k>f~hd058-V#>rIO;iEZ-YwitGiBKPrfj;+>*joZ#p84%=@I4?um1j{Qz-3})_o+89O2u0bSk$gA^vA&@ znkWP#Wl{o2Zje98@o*)H66v{!?aF()7hZ0A?8>7J*_7J~?jH*|Po6(pG^dz$Aj#K0 z=ac-8;|c#W&~TE$E>4zZ;AqP5gEtyD>UgNF@bepYe=EpP%(2l@8!(+jF?4~7{=u2{ z?z`XZcE44(#rZdg&s$r6qXPMVo&5AyTlN>%zQO{>VFxdKzRvD_t>rJw@*U#E-#seW zR}KG>FIW(qbYqN`vvm@L+pu9~@uRqaTVoq+R)8_4&Nzf)#{J#;drUUp3+J?tAmvXj zAA^%+gxE;`5`2`u`Ay&k{Tk``fAadDhJ*aaVOrQ}o^KIiC`{u?oI`hvtsWUk+lgzH zU>`IPp&c8LA@dIg8|*%MyD-VH5QjWC!s+Kv78g}Xz{sFwttSaPvtGIlf4kX9)iWO0!?e3 z_?d})wkAJuyvxKK9N%h~xyinKk>F&OcCR?tx0~-sxObEva{o4)w~w7H$Y;RN#)1(& ziyp?j3~#dIMfot;<8tf*j399`TdU%sSEuZFf&9_;f3Xf$)p=#q6Bm3)1EBcEin#ew_ zyPK6x#5GmJAqxXekPR>WGgEyU&F$ViV-Os99Ne|S7g#dL@RR*24|37&0zm7=vK@66 z*B1B7>p6el^{&iVstuO5igc;J7{`Vk2_u`y1x` z;;oqec=SJd{oi21cxC9lstV-)NjGz>wI|Lp@fVXE0a-|Vzq}U={#f2W+{ymfnf{ju z_+wN3-xGcRttL!$q4lu1Tda=JU=XQr`}42`@z)ZykD>A~S! zZyor8_&a2u0EK?mH9PV!{N1h}vd$Se%-@31vv_{~e?|-aVRg{&tiS&$fB(IR`l0^- DLO^x_ literal 0 HcmV?d00001 diff --git a/docs/source/index.rst b/docs/source/index.rst index 8ac09f6988893..fd741ea5e9766 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -114,6 +114,7 @@ Documentation usage/engine_args usage/env_vars usage/usage_stats + usage/disagg_prefill .. toctree:: :maxdepth: 1 diff --git a/docs/source/usage/disagg_prefill.rst b/docs/source/usage/disagg_prefill.rst new file mode 100644 index 0000000000000..9fe714b4fd856 --- /dev/null +++ b/docs/source/usage/disagg_prefill.rst @@ -0,0 +1,69 @@ +.. _disagg_prefill: + +Disaggregated prefilling (experimental) +======================================= + +This page introduces you the disaggregated prefilling feature in vLLM. This feature is experimental and subject to change. + +Why disaggregated prefilling? +----------------------------- + +Two main reasons: + +* **Tuning time-to-first-token (TTFT) and inter-token-latency (ITL) separately**. Disaggregated prefilling put prefill and decode phase of LLM inference inside different vLLM instances. This gives you the flexibility to assign different parallel strategies (e.g. ``tp`` and ``pp``) to tune TTFT without affecting ITL, or to tune ITL without affecting TTFT. +* **Controlling tail ITL**. Without disaggregated prefilling, vLLM may insert some prefill jobs during the decoding of one request. This results in higher tail latency. Disaggregated prefilling helps you solve this issue and control tail ITL. Chunked prefill with a proper chunk size also can achieve the same goal, but in practice it's hard to figure out the correct chunk size value. So disaggregated prefilling is a much more reliable way to control tail ITL. + +.. note:: + Disaggregated prefill DOES NOT improve throughput. + +Usage example +------------- + +Please refer to ``examples/disaggregated_prefill.sh`` for the example usage of disaggregated prefilling. + + +Benchmarks +---------- + +Please refer to ``benchmarks/disagg_benchmarks/`` for disaggregated prefilling benchmarks. + + +Development +----------- + +We implement disaggregated prefilling by running 2 vLLM instances. One for prefill (we call it prefill instance) and one for decode (we call it decode instance), and then use a connector to transfer the prefill KV caches and results from prefill instance to decode instance. + +All disaggregated prefilling implementation is under ``vllm/distributed/kv_transfer``. + +Key abstractions for disaggregated prefilling: + +* **Connector**: Connector allows **kv consumer** to retrieve the KV caches of a batch of request from **kv producer**. +* **LookupBuffer**: LookupBuffer provides two API: ``insert`` KV cache and ``drop_select`` KV cache. The semantics of ``insert`` and ``drop_select`` are similar to SQL, where ``insert`` inserts a KV cache into the buffer, and ``drop_select`` returns the KV cache that matches the given condition and drop it from the buffer. +* **Pipe**: A single-direction FIFO pipe for tensor transmission. It supports ``send_tensor`` and ``recv_tensor``. + +.. note:: + ``insert`` is non-blocking operation but ``drop_select`` is blocking operation. + +Here is a figure illustrating how the above 3 abstractions are organized: + +.. image:: /assets/usage/disagg_prefill/abstraction.jpg + :alt: Disaggregated prefilling abstractions + +The workflow of disaggregated prefilling is as follows: + +.. image:: /assets/usage/disagg_prefill/overview.jpg + :alt: Disaggregated prefilling workflow + +The ``buffer`` corresponds to ``insert`` API in LookupBuffer, and the ``drop_select`` corresponds to ``drop_select`` API in LookupBuffer. + + +Third-party contributions +------------------------- + +Disaggregated prefilling is highly related to infrastructure, so vLLM relies on third-party connectors for production-level disaggregated prefilling (and vLLM team will actively review and merge new PRs for third-party connectors). + +We recommend three ways of implementations: + +* **Fully-customized connector**: Implement your own ``Connector``, and call third-party libraries to send and receive KV caches, and many many more (like editing vLLM's model input to perform customized prefilling, etc). This approach gives you the most control, but at the risk of being incompatible with future vLLM versions. +* **Database-like connector**: Implement your own ``LookupBuffer`` and support the ``insert`` and ``drop_select`` APIs just like SQL. +* **Distributed P2P connector**: Implement your own ``Pipe`` and support the ``send_tensor`` and ``recv_tensor`` APIs, just like `torch.distributed`. From d263bd9df7b2f5586910e5d006a11ff11ba7c310 Mon Sep 17 00:00:00 2001 From: shangmingc Date: Mon, 16 Dec 2024 05:28:18 +0800 Subject: [PATCH 06/72] [Core] Support disaggregated prefill with Mooncake Transfer Engine (#10884) Signed-off-by: Shangming Cai --- vllm/config.py | 7 +- .../kv_transfer/kv_connector/factory.py | 3 +- .../kv_connector/simple_connector.py | 101 +++++-- .../kv_transfer/kv_pipe/mooncake_pipe.py | 272 ++++++++++++++++++ 4 files changed, 352 insertions(+), 31 deletions(-) create mode 100644 vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py diff --git a/vllm/config.py b/vllm/config.py index 37d062f7eb079..fce8011be4015 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2171,13 +2171,14 @@ def from_cli(cls, cli_value: str) -> "KVTransferConfig": return KVTransferConfig.model_validate_json(cli_value) def model_post_init(self, __context: Any) -> None: + supported_kv_connector = ["PyNcclConnector", "MooncakeConnector"] if all([ - self.kv_connector is not None, - self.kv_connector != "PyNcclConnector" + self.kv_connector is not None, self.kv_connector + not in supported_kv_connector ]): raise ValueError(f"Unsupported kv_connector: {self.kv_connector}. " f"Supported connectors are " - f"`PyNcclConnector`.") + f"{supported_kv_connector}.") if self.kv_role is not None and self.kv_role not in [ "kv_producer", "kv_consumer", "kv_both" diff --git a/vllm/distributed/kv_transfer/kv_connector/factory.py b/vllm/distributed/kv_transfer/kv_connector/factory.py index 015f892cec933..3e2bb436d24b5 100644 --- a/vllm/distributed/kv_transfer/kv_connector/factory.py +++ b/vllm/distributed/kv_transfer/kv_connector/factory.py @@ -11,7 +11,8 @@ class KVConnectorFactory: @staticmethod def create_connector(rank: int, local_rank: int, config: "VllmConfig") -> KVConnectorBase: - if config.kv_transfer_config.kv_connector == 'PyNcclConnector': + supported_kv_connector = ["PyNcclConnector", "MooncakeConnector"] + if config.kv_transfer_config.kv_connector in supported_kv_connector: from .simple_connector import SimpleConnector return SimpleConnector(rank, local_rank, config) else: diff --git a/vllm/distributed/kv_transfer/kv_connector/simple_connector.py b/vllm/distributed/kv_transfer/kv_connector/simple_connector.py index bf4f40ca94e29..4ace03ff1184e 100644 --- a/vllm/distributed/kv_transfer/kv_connector/simple_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/simple_connector.py @@ -2,7 +2,8 @@ Simple KV Cache Connector for Distributed Machine Learning Inference The SimpleConnector transfers KV caches between prefill vLLM worker (KV cache -producer) and decode vLLM worker (KV cache consumer) using PyNcclPipe. +producer) and decode vLLM worker (KV cache consumer) using PyNcclPipe or +MooncakePipe. But the logic can be extended to support other pipe and lookup buffer. """ @@ -15,7 +16,6 @@ from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase from vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer import ( SimpleBuffer) -from vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe import PyNcclPipe from vllm.logger import init_logger from vllm.sequence import IntermediateTensors @@ -36,7 +36,27 @@ def __init__( self.config = config.kv_transfer_config - logger.info("Initializing PyNcclConfig under kv_transfer_config %s", + if self.config.kv_connector == "PyNcclConnector": + from vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe import ( + PyNcclPipe) + logger.info( + "Initializing PyNcclConfig under kv_transfer_config %s", + self.config) + elif self.config.kv_connector == "MooncakeConnector": + # Check if MOONCAKE_CONFIG_PATH is set + import os + use_mooncake_distributed_pipe = os.getenv( + 'MOONCAKE_CONFIG_PATH') is not None + + if not use_mooncake_distributed_pipe: + raise ValueError( + "To use MooncakeConnector, you need to pass the ENV: " + "'MOONCAKE_CONFIG_PATH=/path/to/mooncake_config.json'.") + else: + from vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe import ( # noqa: E501 + MooncakePipe) + logger.info( + "Initializing MooncakeConfig under kv_transfer_config %s", self.config) self.lookup_buffer_size = self.config.kv_buffer_size @@ -44,6 +64,11 @@ def __init__( self.producer_buffer: Optional[SimpleBuffer] = None self.consumer_buffer: Optional[SimpleBuffer] = None + self.producer_data_pipe: Union[PyNcclPipe, MooncakePipe] + self.consumer_data_pipe: Union[PyNcclPipe, MooncakePipe] + self.producer_signal_pipe: Union[PyNcclPipe, MooncakePipe] + self.consumer_signal_pipe: Union[PyNcclPipe, MooncakePipe] + # 2 pipes for every rank in the world port_offset_base = 2 * rank @@ -51,17 +76,26 @@ def __init__( # and the decode vLLM only uses recv pipe if self.config.is_kv_producer: - self.producer_data_pipe = PyNcclPipe( - local_rank=local_rank, - config=self.config, - port_offset=port_offset_base, - ) - self.producer_signal_pipe = PyNcclPipe( - local_rank=local_rank, - config=self.config, - port_offset=port_offset_base + 1, - device="cpu", - ) + if self.config.kv_connector == "PyNcclConnector": + self.producer_data_pipe = PyNcclPipe( + local_rank=local_rank, + config=self.config, + port_offset=port_offset_base, + ) + self.producer_signal_pipe = PyNcclPipe( + local_rank=local_rank, + config=self.config, + port_offset=port_offset_base + 1, + device="cpu", + ) + elif self.config.kv_connector == "MooncakeConnector": + self.producer_data_pipe = MooncakePipe( + local_rank=local_rank, + config=self.config, + ) + # We only need to initialize MooncakePipe once + self.producer_signal_pipe = self.producer_data_pipe + self.producer_buffer = SimpleBuffer(self.producer_signal_pipe, self.producer_data_pipe, self.config.kv_buffer_size) @@ -70,17 +104,25 @@ def __init__( # the current vLLM instance is KV consumer, so it needs to connect # its recv pipe to the send pipe of KV producder - self.consumer_data_pipe = PyNcclPipe( - local_rank=local_rank, - config=self.config, - port_offset=port_offset_base, - ) - self.consumer_signal_pipe = PyNcclPipe( - local_rank=local_rank, - config=self.config, - port_offset=port_offset_base + 1, - device="cpu", - ) + if self.config.kv_connector == "PyNcclConnector": + self.consumer_data_pipe = PyNcclPipe( + local_rank=local_rank, + config=self.config, + port_offset=port_offset_base, + ) + self.consumer_signal_pipe = PyNcclPipe( + local_rank=local_rank, + config=self.config, + port_offset=port_offset_base + 1, + device="cpu", + ) + elif self.config.kv_connector == "MooncakeConnector": + self.consumer_data_pipe = MooncakePipe( + local_rank=local_rank, + config=self.config, + ) + self.consumer_signal_pipe = self.consumer_data_pipe + self.consumer_buffer = SimpleBuffer( self.consumer_signal_pipe, self.consumer_data_pipe, @@ -260,6 +302,11 @@ def recv_kv_caches_and_hidden_states( def close(self): self.producer_data_pipe.close() - self.producer_signal_pipe.close() self.consumer_data_pipe.close() - self.consumer_signal_pipe.close() + if self.config.kv_connector == "PyNcclConnector": + self.producer_signal_pipe.close() + self.consumer_signal_pipe.close() + elif self.config.kv_connector == "MooncakeConnector": + # MooncakePipe reuses data_pipe for signal_pipe, so we only have to + # close the data_pipe. + pass diff --git a/vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py b/vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py new file mode 100644 index 0000000000000..8e4358672b74d --- /dev/null +++ b/vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py @@ -0,0 +1,272 @@ +import json +import os +import pickle +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from typing import Optional, Union + +import torch +import zmq + +from vllm.config import KVTransferConfig +from vllm.distributed.kv_transfer.kv_pipe.base import KVPipeBase +from vllm.logger import init_logger + +logger = init_logger(__name__) +NONE_INT = -150886311 + + +@dataclass +class MooncakeTransferEngineConfig: + prefill_url: str + decode_url: str + metadata_backend: Union[str, None] + metadata_server: str + protocol: str + device_name: str + + @staticmethod + def from_file(file_path: str) -> 'MooncakeTransferEngineConfig': + """Load the config from a JSON file.""" + with open(file_path) as fin: + config = json.load(fin) + return MooncakeTransferEngineConfig( + prefill_url=config.get("prefill_url"), + decode_url=config.get("decode_url"), + metadata_backend=config.get("metadata_backend", None), + metadata_server=config.get("metadata_server"), + protocol=config.get("protocol", "tcp"), + device_name=config.get("device_name", ""), + ) + + @staticmethod + def load_from_env() -> 'MooncakeTransferEngineConfig': + """Load config from a file specified in the environment variable.""" + config_file_path = os.getenv('MOONCAKE_CONFIG_PATH') + if config_file_path is None: + raise ValueError( + "The environment variable 'MOONCAKE_CONFIG_PATH' is not set.") + return MooncakeTransferEngineConfig.from_file(config_file_path) + + +class MooncakeTransferEngine: + """Handles the transfer of data using mooncake_vllm_adaptor and ZeroMQ.""" + + def __init__(self, kv_rank: int, local_rank: int): + try: + import mooncake_vllm_adaptor as mva + except ImportError as e: + raise ImportError( + "Please install mooncake by following the instructions at " + "https://github.com/kvcache-ai/Mooncake/blob/main/doc/en/build.md " # noqa: E501 + "to run vLLM with MooncakeConnector.") from e + + self.engine = mva.mooncake_vllm_adaptor() + self.local_rank = local_rank + + try: + self.config = MooncakeTransferEngineConfig.load_from_env() + logger.info("Mooncake Configuration loaded successfully.") + except ValueError as e: + logger.error(e) + raise + except Exception as exc: + logger.error( + "An error occurred while loading the configuration: %s", exc) + raise + prefill_host, base_prefill_port = self.config.prefill_url.split(':') + decode_host, base_decode_port = self.config.decode_url.split(':') + + # Avoid ports conflict when running prefill and decode on the same node + if prefill_host == decode_host and \ + base_prefill_port == base_decode_port: + base_decode_port = str(int(base_decode_port) + 100) + + prefill_port = int(base_prefill_port) + self.local_rank + decode_port = int(base_decode_port) + self.local_rank + self.prefill_url = ':'.join([prefill_host, str(prefill_port)]) + self.decode_url = ':'.join([decode_host, str(decode_port)]) + + self.initialize(self.prefill_url if kv_rank == 0 else self.decode_url, + self.config.metadata_server, self.config.protocol, + self.config.device_name, self.config.metadata_backend) + + self.remote_url = (self.decode_url + if kv_rank == 0 else self.prefill_url) + + # Initialize ZeroMQ context and sockets + self.context = zmq.Context() # type: ignore[attr-defined] + self.sender_socket = self.context.socket(zmq.constants.PUSH) + self.receiver_socket = self.context.socket(zmq.constants.PULL) + self.sender_ack = self.context.socket(zmq.constants.PULL) + self.receiver_ack = self.context.socket(zmq.constants.PUSH) + + self.buffer_cleaner = ThreadPoolExecutor(max_workers=1) + self._setup_metadata_sockets(kv_rank, prefill_host, base_prefill_port, + decode_host, base_decode_port) + + def _setup_metadata_sockets(self, kv_rank: int, p_host: str, p_port: str, + d_host: str, d_port: str) -> None: + """Set up ZeroMQ sockets for sending and receiving data.""" + # Offsets < 8 are left for initialization in case tp and pp are enabled + p_rank_offset = int(p_port) + 8 + self.local_rank * 2 + d_rank_offset = int(d_port) + 8 + self.local_rank * 2 + if kv_rank == 0: + self.sender_socket.bind(f"tcp://*:{p_rank_offset + 1}") + self.receiver_socket.connect(f"tcp://{d_host}:{d_rank_offset + 1}") + self.sender_ack.connect(f"tcp://{d_host}:{d_rank_offset + 2}") + self.receiver_ack.bind(f"tcp://*:{p_rank_offset + 2}") + else: + self.receiver_socket.connect(f"tcp://{p_host}:{p_rank_offset + 1}") + self.sender_socket.bind(f"tcp://*:{d_rank_offset + 1}") + self.receiver_ack.bind(f"tcp://*:{d_rank_offset + 2}") + self.sender_ack.connect(f"tcp://{p_host}:{p_rank_offset + 2}") + + def initialize(self, local_hostname: str, metadata_server: str, + protocol: str, device_name: str, + metadata_backend: Union[str, None]) -> None: + """Initialize the mooncake instance.""" + if metadata_backend is None: + self.engine.initialize(local_hostname, metadata_server, protocol, + device_name) + else: + supported_backend = ["etcd", "redis"] + metadata_backend = metadata_backend.lower() + if metadata_backend not in supported_backend: + raise ValueError( + "Mooncake Configuration error. `metadata_backend`" + f"should be one of {supported_backend}.") + + self.engine.initializeExt(local_hostname, metadata_server, + protocol, device_name, metadata_backend) + + def allocate_managed_buffer(self, length: int) -> int: + """Allocate a managed buffer of the specified length.""" + ret = self.engine.allocateManagedBuffer(length) + if ret <= 0: + logger.error("Allocation Return Error") + raise Exception("Allocation Return Error") + return ret + + def free_managed_buffer(self, buffer: int, length: int) -> int: + """Free a previously allocated managed buffer.""" + return self.engine.freeManagedBuffer(buffer, length) + + def transfer_sync(self, buffer: int, peer_buffer_address: int, + length: int) -> int: + """Synchronously transfer data to the specified address.""" + ret = self.engine.transferSync(self.remote_url, buffer, + peer_buffer_address, length) + if ret < 0: + logger.error("Transfer Return Error") + raise Exception("Transfer Return Error") + return ret + + def write_bytes_to_buffer(self, buffer: int, user_data: bytes, + length: int) -> int: + """Write bytes to the allocated buffer.""" + return self.engine.writeBytesToBuffer(buffer, user_data, length) + + def read_bytes_from_buffer(self, buffer: int, length: int) -> bytes: + """Read bytes from the allocated buffer.""" + return self.engine.readBytesFromBuffer(buffer, length) + + def wait_for_ack(self, src_ptr: int, length: int) -> None: + """Asynchronously wait for ACK from the receiver.""" + ack = self.sender_ack.recv_pyobj() + if ack != b'ACK': + logger.error("Failed to receive ACK from the receiver") + + self.free_managed_buffer(src_ptr, length) + + def send_bytes(self, user_data: bytes) -> None: + """Send bytes to the remote process.""" + length = len(user_data) + src_ptr = self.allocate_managed_buffer(length) + self.write_bytes_to_buffer(src_ptr, user_data, length) + self.sender_socket.send_pyobj((src_ptr, length)) + self.buffer_cleaner.submit(self.wait_for_ack, src_ptr, length) + + def recv_bytes(self) -> bytes: + """Receive bytes from the remote process.""" + src_ptr, length = self.receiver_socket.recv_pyobj() + dst_ptr = self.allocate_managed_buffer(length) + self.transfer_sync(dst_ptr, src_ptr, length) + ret = self.read_bytes_from_buffer(dst_ptr, length) + + # Buffer cleanup + self.receiver_ack.send_pyobj(b'ACK') + self.free_managed_buffer(dst_ptr, length) + + return ret + + +class MooncakePipe(KVPipeBase): + """MooncakeTransferEngine based Pipe implementation.""" + + def __init__(self, + local_rank: int, + config: KVTransferConfig, + device: Optional[str] = None): + """Initialize the mooncake pipe and set related parameters.""" + self.config = config + self.local_rank = local_rank + self.kv_rank = self.config.kv_rank + if device is None: + self.device = self._select_device(self.config.kv_buffer_device) + else: + self.device = self._select_device(device) + + self.transfer_engine = MooncakeTransferEngine(self.kv_rank, + self.local_rank) + self.transport_thread: Optional[ThreadPoolExecutor] = None + self.none_tensor = torch.tensor([NONE_INT], device=self.device) + + def _select_device(self, device: str) -> torch.device: + """Select available device (CUDA or CPU).""" + logger.info("Selecting device: %s", device) + if device == "cuda": + return torch.device(f"cuda:{self.local_rank}") + else: + return torch.device("cpu") + + def tensor_hash(self, tensor: torch.Tensor) -> int: + """Calculate the hash value of the tensor.""" + return hash(tensor.data_ptr()) + + def _send_impl(self, tensor: torch.Tensor) -> None: + """Implement the tensor sending logic.""" + value_bytes = pickle.dumps(tensor) + self.transfer_engine.send_bytes(value_bytes) + + def _recv_impl(self) -> torch.Tensor: + """Implement the tensor receiving logic.""" + data = self.transfer_engine.recv_bytes() + return pickle.loads(data) + + def send_tensor(self, tensor: Optional[torch.Tensor]) -> None: + """Send tensor to the target process.""" + if self.transport_thread is None: + self.transport_thread = ThreadPoolExecutor(max_workers=1) + tensor = tensor if tensor is not None else self.none_tensor + assert (len(tensor.shape) > 0) + self.transport_thread.submit(self._send_impl, tensor) + + def recv_tensor(self) -> Optional[torch.Tensor]: + """Receive tensor from other processes.""" + if self.transport_thread is None: + self.transport_thread = ThreadPoolExecutor(max_workers=1) + tensor = self.transport_thread.submit(self._recv_impl).result() + if tensor.numel() == 1 and tensor.item() == NONE_INT: + return None + else: + return tensor + + def close(self) -> None: + """Cleanup logic when closing the pipe.""" + self.transfer_engine.sender_socket.close() + self.transfer_engine.receiver_socket.close() + self.transfer_engine.sender_ack.close() + self.transfer_engine.receiver_ack.close() + self.transfer_engine.context.term() # Terminate the ZMQ context + logger.info("Closed the transfer engine and cleaned up resources.") From 25ebed2f8ca6d747d63f2be9ede023c561851ac8 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 15 Dec 2024 13:33:00 -0800 Subject: [PATCH 07/72] [V1][Minor] Cache np arange to reduce input preparation overhead (#11214) Signed-off-by: Woosuk Kwon --- vllm/v1/worker/gpu_model_runner.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index abcd4b007a326..67166fb05085c 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -118,6 +118,12 @@ def __init__( dtype=self.dtype, device=self.device) + # OPTIMIZATION: Cache the tensors rather than creating them every step. + self.arange_np = np.arange(max(self.max_num_reqs, self.max_model_len), + dtype=np.int32) + # NOTE(woosuk): These tensors are "stateless", i.e., they are literally + # a faster version of creating a new tensor every time. Thus, we should + # not make any assumptions about the values in these tensors. self.input_ids_cpu = torch.zeros(self.max_num_tokens, dtype=torch.int32, device="cpu", @@ -269,11 +275,13 @@ def _prepare_inputs(self, scheduler_output: "SchedulerOutput"): # Get request indices. # E.g., [2, 5, 3] -> [0, 0, 1, 1, 1, 1, 1, 2, 2, 2] - req_indices = np.repeat(np.arange(num_reqs), num_scheduled_tokens) + req_indices = np.repeat(self.arange_np[:num_reqs], + num_scheduled_tokens) # Get batched arange. # E.g., [2, 5, 3] -> [0, 1, 0, 1, 2, 3, 4, 0, 1, 2] - arange = np.concatenate([np.arange(n) for n in num_scheduled_tokens]) + arange = np.concatenate( + [self.arange_np[:n] for n in num_scheduled_tokens]) # Get positions. positions_np = self.positions_np[:total_num_scheduled_tokens] From da6f40924609e084ced486cae5b4ddf97133acd9 Mon Sep 17 00:00:00 2001 From: AlexHe99 Date: Mon, 16 Dec 2024 08:33:58 +0800 Subject: [PATCH 08/72] Update deploying_with_k8s.rst (#10922) --- docs/source/serving/deploying_with_k8s.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/serving/deploying_with_k8s.rst b/docs/source/serving/deploying_with_k8s.rst index 7dc076dc709df..cc3606f0df851 100644 --- a/docs/source/serving/deploying_with_k8s.rst +++ b/docs/source/serving/deploying_with_k8s.rst @@ -162,7 +162,7 @@ To test the deployment, run the following ``curl`` command: curl http://mistral-7b.default.svc.cluster.local/v1/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "facebook/opt-125m", + "model": "mistralai/Mistral-7B-Instruct-v0.3", "prompt": "San Francisco is a", "max_tokens": 7, "temperature": 0 @@ -172,4 +172,4 @@ If the service is correctly deployed, you should receive a response from the vLL Conclusion ---------- -Deploying vLLM with Kubernetes allows for efficient scaling and management of ML models leveraging GPU resources. By following the steps outlined above, you should be able to set up and test a vLLM deployment within your Kubernetes cluster. If you encounter any issues or have suggestions, please feel free to contribute to the documentation. \ No newline at end of file +Deploying vLLM with Kubernetes allows for efficient scaling and management of ML models leveraging GPU resources. By following the steps outlined above, you should be able to set up and test a vLLM deployment within your Kubernetes cluster. If you encounter any issues or have suggestions, please feel free to contribute to the documentation. From 69ba344de8683ec4d3d42d11ae4e147a2a302da8 Mon Sep 17 00:00:00 2001 From: chenqianfzh <51831990+chenqianfzh@users.noreply.github.com> Date: Sun, 15 Dec 2024 16:38:40 -0800 Subject: [PATCH 09/72] [Bugfix] Fix block size validation (#10938) --- vllm/engine/arg_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 5a73c6ee02e0c..0aa367a173b6c 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -425,7 +425,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parser.add_argument('--block-size', type=int, default=EngineArgs.block_size, - choices=[8, 16, 32, 64, 128], + choices=[8, 16, 32], help='Token block size for contiguous chunks of ' 'tokens. This is ignored on neuron devices and ' 'set to max-model-len') From 17138af7c45eba3aba3e9b84a3852b4ba81e460f Mon Sep 17 00:00:00 2001 From: yansh97 Date: Mon, 16 Dec 2024 16:15:40 +0800 Subject: [PATCH 10/72] [Bugfix] Fix the default value for temperature in ChatCompletionRequest (#11219) --- vllm/entrypoints/openai/protocol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index dfb7c977dbd43..6ed7c2e9dcd6b 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -211,7 +211,7 @@ class ChatCompletionRequest(OpenAIBaseModel): stop: Optional[Union[str, List[str]]] = Field(default_factory=list) stream: Optional[bool] = False stream_options: Optional[StreamOptions] = None - temperature: Optional[float] = 0.7 + temperature: Optional[float] = 1.0 top_p: Optional[float] = 1.0 tools: Optional[List[ChatCompletionToolsParam]] = None tool_choice: Optional[Union[Literal["none"], Literal["auto"], From b3b1526f03906c935e6ef80a2cdc971a65fdf7e2 Mon Sep 17 00:00:00 2001 From: cennn <61925104+cennn@users.noreply.github.com> Date: Mon, 16 Dec 2024 17:20:49 +0800 Subject: [PATCH 11/72] WIP: [CI/Build] simplify Dockerfile build for ARM64 / GH200 (#11212) Signed-off-by: drikster80 Co-authored-by: drikster80 --- Dockerfile | 40 +++++++++++++++---- docs/source/serving/deploying_with_docker.rst | 26 ++++++++++++ requirements-build.txt | 2 +- requirements-cuda-arm64.txt | 3 ++ requirements-cuda.txt | 4 +- 5 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 requirements-cuda-arm64.txt diff --git a/Dockerfile b/Dockerfile index c1b6e1bbfe354..123703848749c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,7 @@ ARG CUDA_VERSION=12.4.1 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base ARG CUDA_VERSION=12.4.1 ARG PYTHON_VERSION=3.12 +ARG TARGETPLATFORM ENV DEBIAN_FRONTEND=noninteractive # Install Python and other dependencies @@ -46,9 +47,14 @@ WORKDIR /workspace # install build and runtime dependencies COPY requirements-common.txt requirements-common.txt COPY requirements-cuda.txt requirements-cuda.txt +COPY requirements-cuda-arm64.txt requirements-cuda-arm64.txt RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install -r requirements-cuda.txt +RUN --mount=type=cache,target=/root/.cache/pip \ + if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ + python3 -m pip install -r requirements-cuda-arm64.txt; \ + fi # cuda arch list used by torch # can be useful for both `dev` and `test` @@ -63,6 +69,7 @@ ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches} #################### WHEEL BUILD IMAGE #################### FROM base AS build +ARG TARGETPLATFORM # install build dependencies COPY requirements-build.txt requirements-build.txt @@ -70,6 +77,11 @@ COPY requirements-build.txt requirements-build.txt RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install -r requirements-build.txt +RUN --mount=type=cache,target=/root/.cache/pip \ + if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ + python3 -m pip install -r requirements-cuda-arm64.txt; \ + fi + COPY . . ARG GIT_REPO_CHECK=0 RUN --mount=type=bind,source=.git,target=.git \ @@ -134,8 +146,8 @@ COPY requirements-test.txt requirements-test.txt COPY requirements-dev.txt requirements-dev.txt RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install -r requirements-dev.txt - #################### DEV IMAGE #################### + #################### vLLM installation IMAGE #################### # image with vLLM installed FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS vllm-base @@ -143,6 +155,9 @@ ARG CUDA_VERSION=12.4.1 ARG PYTHON_VERSION=3.12 WORKDIR /vllm-workspace ENV DEBIAN_FRONTEND=noninteractive +ARG TARGETPLATFORM + +COPY requirements-cuda-arm64.txt requirements-cuda-arm64.txt RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \ echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment @@ -168,18 +183,25 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ # or future versions of triton. RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ -# install vllm wheel first, so that torch etc will be installed +# Install vllm wheel first, so that torch etc will be installed. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install dist/*.whl --verbose RUN --mount=type=cache,target=/root/.cache/pip \ - . /etc/environment && \ - python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp${PYTHON_VERSION_STR}-cp${PYTHON_VERSION_STR}-linux_x86_64.whl + if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ + pip uninstall -y torch && \ + python3 -m pip install -r requirements-cuda-arm64.txt; \ + fi + +RUN --mount=type=cache,target=/root/.cache/pip \ +. /etc/environment && \ +if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \ + python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp${PYTHON_VERSION_STR}-cp${PYTHON_VERSION_STR}-linux_x86_64.whl; \ +fi COPY examples examples #################### vLLM installation IMAGE #################### - #################### TEST IMAGE #################### # image to run unit testing suite # note that this uses vllm installed by `pip` @@ -209,7 +231,6 @@ COPY vllm/v1 /usr/local/lib/python3.12/dist-packages/vllm/v1 RUN mkdir test_docs RUN mv docs test_docs/ RUN mv vllm test_docs/ - #################### TEST IMAGE #################### #################### OPENAI API SERVER #################### @@ -218,8 +239,11 @@ FROM vllm-base AS vllm-openai # install additional dependencies for openai api server RUN --mount=type=cache,target=/root/.cache/pip \ - pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.0' timm==0.9.10 - + if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ + pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10'; \ + else \ + pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.0' 'timm==0.9.10'; \ + fi ENV VLLM_USAGE_SOURCE production-docker-image ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] diff --git a/docs/source/serving/deploying_with_docker.rst b/docs/source/serving/deploying_with_docker.rst index 14d94b09e9b9c..11a9f12fd17cd 100644 --- a/docs/source/serving/deploying_with_docker.rst +++ b/docs/source/serving/deploying_with_docker.rst @@ -37,6 +37,32 @@ You can build and run vLLM from source via the provided `Dockerfile =61 setuptools-scm>=8 -torch==2.5.1 +torch==2.5.1; platform_machine != 'aarch64' wheel jinja2 diff --git a/requirements-cuda-arm64.txt b/requirements-cuda-arm64.txt new file mode 100644 index 0000000000000..bbcb5cb7012ce --- /dev/null +++ b/requirements-cuda-arm64.txt @@ -0,0 +1,3 @@ +--index-url https://download.pytorch.org/whl/nightly/cu124 +torchvision==0.22.0.dev20241215; platform_machine == 'aarch64' +torch==2.6.0.dev20241210+cu124; platform_machine == 'aarch64' diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 058ab7c1ee9df..5d4dee8c7129a 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -4,7 +4,7 @@ # Dependencies for NVIDIA GPUs ray >= 2.9 nvidia-ml-py >= 12.560.30 # for pynvml package -torch == 2.5.1 +torch == 2.5.1; platform_machine != 'aarch64' # These must be updated alongside torch -torchvision == 0.20.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version +torchvision == 0.20.1; platform_machine != 'aarch64' # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version xformers == 0.0.28.post3; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.5.1 From bddbbcb132429084ede62855bcd6a1023a3645c1 Mon Sep 17 00:00:00 2001 From: Jani Monoses Date: Mon, 16 Dec 2024 11:56:19 +0200 Subject: [PATCH 12/72] [Model] Support Cohere2ForCausalLM (Cohere R7B) (#11203) --- docs/source/models/supported_models.rst | 4 ++-- tests/models/registry.py | 2 ++ tests/models/test_initialization.py | 4 ++++ vllm/model_executor/models/commandr.py | 19 +++++++++++++++++-- vllm/model_executor/models/registry.py | 1 + 5 files changed, 26 insertions(+), 4 deletions(-) diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst index cae4a88de1638..3bef3f3226062 100644 --- a/docs/source/models/supported_models.rst +++ b/docs/source/models/supported_models.rst @@ -118,9 +118,9 @@ Text Generation (``--task generate``) - :code:`THUDM/chatglm2-6b`, :code:`THUDM/chatglm3-6b`, etc. - ✅︎ - ✅︎ - * - :code:`CohereForCausalLM` + * - :code:`CohereForCausalLM`,:code:`Cohere2ForCausalLM` - Command-R - - :code:`CohereForAI/c4ai-command-r-v01`, etc. + - :code:`CohereForAI/c4ai-command-r-v01`, :code:`CohereForAI/c4ai-command-r7b-12-2024`, etc. - ✅︎ - ✅︎ * - :code:`DbrxForCausalLM` diff --git a/tests/models/registry.py b/tests/models/registry.py index 6a8b1742ceae3..fac8c4b2e9b19 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -53,6 +53,8 @@ class _HfExamplesInfo: # ChatGLMModel supports multimodal "CohereForCausalLM": _HfExamplesInfo("CohereForAI/c4ai-command-r-v01", trust_remote_code=True), + "Cohere2ForCausalLM": _HfExamplesInfo("CohereForAI/c4ai-command-r7b-12-2024", # noqa: E501 + trust_remote_code=True), "DbrxForCausalLM": _HfExamplesInfo("databricks/dbrx-instruct"), "DeciLMForCausalLM": _HfExamplesInfo("Deci/DeciLM-7B-instruct", trust_remote_code=True), diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py index 3b728f2744fca..a4eea7f035c91 100644 --- a/tests/models/test_initialization.py +++ b/tests/models/test_initialization.py @@ -1,6 +1,7 @@ from unittest.mock import patch import pytest +import transformers from transformers import PretrainedConfig from vllm import LLM @@ -11,6 +12,9 @@ @pytest.mark.parametrize("model_arch", HF_EXAMPLE_MODELS.get_supported_archs()) def test_can_initialize(model_arch): model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch) + if (model_arch == "Cohere2ForCausalLM" + and transformers.__version__ < "4.48.0"): + pytest.skip(reason="Model introduced in HF >= 4.48.0") if not model_info.is_available_online: pytest.skip("Model is not available online") diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py index 85e24ca660686..c846e42f1b0c3 100644 --- a/vllm/model_executor/models/commandr.py +++ b/vllm/model_executor/models/commandr.py @@ -48,7 +48,7 @@ from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP -from .utils import (is_pp_missing_parameter, +from .utils import (extract_layer_index, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) @@ -171,12 +171,26 @@ def __init__( rope_scaling=self.rope_scaling, is_neox_style=False, ) + + sliding_window = getattr(config, "sliding_window", None) + # Model v2 has sliding windows, v1 does not + self.v1 = sliding_window is None + + layer_idx = extract_layer_index(prefix) + layer_has_sliding_window = ( + getattr(config, "sliding_window_pattern", False) + and (layer_idx + 1) % self.config.sliding_window_pattern != 0) + + self.sliding_window = (sliding_window + if layer_has_sliding_window else None) + self.attn = Attention(self.num_heads, self.head_dim, self.scaling, num_kv_heads=self.num_kv_heads, cache_config=cache_config, quant_config=quant_config, + per_layer_sliding_window=self.sliding_window, prefix=f"{prefix}.attn") if self.use_qk_norm: self.q_norm = LayerNorm(param_shape=(self.num_heads, @@ -206,7 +220,8 @@ def forward( q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1) if self.use_qk_norm: q, k = self._apply_qk_norm(q, k) - q, k = self.rotary_emb(positions, q, k) + if self.v1 or self.sliding_window: + q, k = self.rotary_emb(positions, q, k) attn_output = self.attn(q, k, v, kv_cache, attn_metadata) output, _ = self.o_proj(attn_output) return output diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 4e77746f312e3..68a2467a813a1 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -41,6 +41,7 @@ "BloomForCausalLM": ("bloom", "BloomForCausalLM"), # ChatGLMModel supports multimodal "CohereForCausalLM": ("commandr", "CohereForCausalLM"), + "Cohere2ForCausalLM": ("commandr", "CohereForCausalLM"), "DbrxForCausalLM": ("dbrx", "DbrxForCausalLM"), "DeciLMForCausalLM": ("decilm", "DeciLMForCausalLM"), "DeepseekForCausalLM": ("deepseek", "DeepseekForCausalLM"), From d927dbcd889fb2476cb61ea477ff51e5dd9e1ae3 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Mon, 16 Dec 2024 18:09:53 +0800 Subject: [PATCH 13/72] [Model] Refactor Ultravox to use merged input processor (#11198) Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Cyrus Leung --- examples/offline_inference_audio_language.py | 10 +- tests/distributed/test_pipeline_parallel.py | 2 +- tests/entrypoints/openai/test_audio.py | 1 + .../audio_language/test_ultravox.py | 5 +- vllm/entrypoints/chat_utils.py | 2 +- vllm/model_executor/models/ultravox.py | 244 ++++++++---------- vllm/multimodal/processing.py | 19 +- 7 files changed, 129 insertions(+), 154 deletions(-) diff --git a/examples/offline_inference_audio_language.py b/examples/offline_inference_audio_language.py index 050b791b62adb..68b786961b14a 100644 --- a/examples/offline_inference_audio_language.py +++ b/examples/offline_inference_audio_language.py @@ -25,16 +25,16 @@ def run_ultravox(question: str, audio_count: int): tokenizer = AutoTokenizer.from_pretrained(model_name) messages = [{ - 'role': - 'user', - 'content': - "<|reserved_special_token_0|>\n" * audio_count + question + 'role': 'user', + 'content': "<|audio|>\n" * audio_count + question }] prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - llm = LLM(model=model_name, limit_mm_per_prompt={"audio": audio_count}) + llm = LLM(model=model_name, + trust_remote_code=True, + limit_mm_per_prompt={"audio": audio_count}) stop_token_ids = None return llm, prompt, stop_token_ids diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index 85d408efafe96..ddbf40f089407 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -214,7 +214,7 @@ def iter_params(self, model_name: str): "Qwen/Qwen-VL-Chat": PPTestSettings.fast(trust_remote_code=True), "Qwen/Qwen2-Audio-7B-Instruct": PPTestSettings.fast(), "Qwen/Qwen2-VL-2B-Instruct": PPTestSettings.fast(), - "fixie-ai/ultravox-v0_3": PPTestSettings.fast(), + "fixie-ai/ultravox-v0_3": PPTestSettings.fast(trust_remote_code=True), # [Encoder-decoder] # TODO: Implement PP # "meta-llama/Llama-3.2-11B-Vision-Instruct": PPTestSettings.fast(), diff --git a/tests/entrypoints/openai/test_audio.py b/tests/entrypoints/openai/test_audio.py index a74109e2f5120..b579dcbb5c402 100644 --- a/tests/entrypoints/openai/test_audio.py +++ b/tests/entrypoints/openai/test_audio.py @@ -25,6 +25,7 @@ def server(): "--max-num-seqs", "5", "--enforce-eager", + "--trust-remote-code", ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: diff --git a/tests/models/decoder_only/audio_language/test_ultravox.py b/tests/models/decoder_only/audio_language/test_ultravox.py index e100c6b9bb906..c548cfdf53414 100644 --- a/tests/models/decoder_only/audio_language/test_ultravox.py +++ b/tests/models/decoder_only/audio_language/test_ultravox.py @@ -16,7 +16,7 @@ AudioTuple = Tuple[np.ndarray, int] -VLLM_PLACEHOLDER = "<|reserved_special_token_0|>" +VLLM_PLACEHOLDER = "<|audio|>" HF_PLACEHOLDER = "<|audio|>" CHUNKED_PREFILL_KWARGS = { @@ -46,7 +46,8 @@ def audio(request): def server(request, audio_assets): args = [ "--dtype=bfloat16", "--max-model-len=4096", "--enforce-eager", - f"--limit-mm-per-prompt=audio={len(audio_assets)}" + f"--limit-mm-per-prompt=audio={len(audio_assets)}", + "--trust-remote-code" ] + [ f"--{key.replace('_','-')}={value}" for key, value in request.param.items() diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index c2054dcbfce0e..aaa5cd759366a 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -418,7 +418,7 @@ def _placeholder_str(self, modality: ModalityStr, raise TypeError(f"Unknown {modality} model type: {model_type}") elif modality == "audio": if model_type == "ultravox": - return "<|reserved_special_token_0|>" + return "<|audio|>" if model_type == "qwen2_audio": return (f"Audio {current_count}: " f"<|audio_bos|><|AUDIO|><|audio_eos|>") diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py index ea1e5401d42c0..ebaa8a4c4f38a 100644 --- a/vllm/model_executor/models/ultravox.py +++ b/vllm/model_executor/models/ultravox.py @@ -3,41 +3,39 @@ import math from functools import cached_property, lru_cache -from typing import (Iterable, List, Literal, Mapping, Optional, Set, Tuple, - TypedDict, Union, cast) +from typing import (Any, Dict, Iterable, List, Literal, Mapping, Optional, Set, + Tuple, TypedDict, Union) import numpy as np import torch import torch.utils.checkpoint from torch import nn from torch.nn import functional as F +from transformers import BatchFeature from transformers.models.whisper import WhisperFeatureExtractor from transformers.models.whisper.modeling_whisper import WhisperEncoder from vllm.attention import AttentionMetadata from vllm.config import VllmConfig -from vllm.inputs import (INPUT_REGISTRY, DecoderOnlyInputs, DummyData, - InputContext, token_inputs) +from vllm.inputs import InputContext from vllm.model_executor.layers.activation import SiluAndMul, get_act_fn from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler from vllm.model_executor.model_loader.loader import DefaultModelLoader from vllm.model_executor.sampling_metadata import SamplingMetadata -from vllm.multimodal import (MULTIMODAL_REGISTRY, MultiModalKwargs, - NestedTensors) -from vllm.multimodal.utils import (cached_get_tokenizer, - consecutive_placeholder_ranges, - repeat_and_pad_placeholder_tokens) -from vllm.sequence import IntermediateTensors, SequenceData +from vllm.multimodal import MULTIMODAL_REGISTRY, NestedTensors +from vllm.multimodal.processing import (BaseMultiModalProcessor, + MultiModalDataDict, + MultiModalDataItems, ProcessorInputs, + PromptReplacement) +from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs.ultravox import UltravoxConfig -from vllm.utils import is_list_of from .interfaces import SupportsMultiModal, SupportsPP from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, init_vllm_registered_model, maybe_prefix, merge_multimodal_embeddings_from_map) -_AUDIO_PLACEHOLDER_TOKEN = 128002 _AUDIO_TOKENS_PER_SECOND = 6.25 @@ -72,64 +70,18 @@ def get_ultravox_max_audio_tokens(ctx: InputContext): return math.ceil(feature_extractor.chunk_length * _AUDIO_TOKENS_PER_SECOND) -def dummy_seq_data_for_ultravox( - ctx: InputContext, - seq_len: int, - audio_count: int, -): - audio_length = min(get_ultravox_max_audio_tokens(ctx), - seq_len // audio_count) +class UltravoxMultiModalProcessor(BaseMultiModalProcessor): - return SequenceData.from_prompt_token_counts( - (_AUDIO_PLACEHOLDER_TOKEN, audio_length * audio_count), - (0, seq_len - audio_length * audio_count)), { - "audio": - consecutive_placeholder_ranges(num_items=audio_count, - item_size=audio_length) - } - - -def dummy_audio_for_ultravox( - ctx: InputContext, - audio_count: int, -): - feature_extractor = whisper_feature_extractor(ctx) - audio_and_sr = (np.array([0.0] * feature_extractor.chunk_length), 1) - return {"audio": [audio_and_sr] * audio_count} - - -def dummy_data_for_ultravox( - ctx: InputContext, - seq_len: int, - mm_counts: Mapping[str, int], -): - audio_count = mm_counts["audio"] - seq_data, ranges = dummy_seq_data_for_ultravox(ctx, seq_len, audio_count) - mm_dict = dummy_audio_for_ultravox(ctx, audio_count) - - return DummyData(seq_data, mm_dict, ranges) - - -def input_mapper_for_ultravox(ctx: InputContext, data: object): - if not isinstance(data, list): - data = [data] - - if len(data) == 0: - return MultiModalKwargs() - - # If the audio inputs are embeddings, no need for preprocessing - if is_list_of(data, torch.Tensor, check="all"): - return MultiModalKwargs({"audio_embeds": data}) - - audio_features = [] - for audio_input in data: - if not isinstance(audio_input, tuple): - raise NotImplementedError( - f"Unsupported data type: {type(audio_input)}") - - (audio, sr) = cast(Tuple[np.ndarray, Union[float, int]], audio_input) - feature_extractor = whisper_feature_extractor(ctx) + def _get_feature_extractor(self) -> WhisperFeatureExtractor: + return self._get_hf_processor().audio_processor.feature_extractor + def _resample_audio( + self, + audio: np.ndarray, + sr: int, + ) -> Dict[str, Union[np.ndarray, int]]: + # resample audio to the model's sampling rate + feature_extractor = self._get_feature_extractor() if sr != feature_extractor.sampling_rate: try: import librosa @@ -140,78 +92,92 @@ def input_mapper_for_ultravox(ctx: InputContext, data: object): orig_sr=sr, target_sr=feature_extractor.sampling_rate) sr = feature_extractor.sampling_rate + return {"audio": audio, "sampling_rate": sr} - minimum_audio_length = feature_extractor.n_fft // 2 + 1 - if len(audio) < minimum_audio_length: - # Not enough audio; pad it. - audio = np.pad(audio, (0, minimum_audio_length - len(audio))) - - single_audio_features = feature_extractor( - audio, sampling_rate=sr, padding="longest", - return_tensors="pt")["input_features"] - - # Remove the batch dimension because we're wrapping it in a list. - audio_features.append(single_audio_features.squeeze(0)) - - return MultiModalKwargs({"audio_features": audio_features}) - - -def input_processor_for_ultravox(ctx: InputContext, inputs: DecoderOnlyInputs): - multi_modal_data = inputs.get("multi_modal_data") - if multi_modal_data is None or "audio" not in multi_modal_data: - return inputs + def _apply_hf_processor( + self, + prompt: str, + mm_data: MultiModalDataDict, + mm_processor_kwargs: Mapping[str, object], + ) -> BatchFeature: + if not mm_data or not mm_data.get("audio", None): + return super()._apply_hf_processor(prompt, mm_data, + mm_processor_kwargs) + + audio_data = mm_data["audio"] + if not isinstance(audio_data, list): + audio_data = [audio_data] + + # Ultravox processor doesn't support multiple inputs, + # therefore we need to input text and audio one by one + tokenizer = self._get_tokenizer() + audio_features, audio_token_len = [], [] + processed_inputs = {} + for audio, sr in audio_data: + data = self._resample_audio(audio, sr) + processed_inputs = super()._apply_hf_processor( + prompt, data, mm_processor_kwargs) + prompt = tokenizer.decode(processed_inputs["input_ids"][0], + skip_special_tokens=False) + audio_features.append( + processed_inputs.pop("audio_values").squeeze(0)) + audio_token_len.append( + processed_inputs.pop("audio_token_len").item()) + + return dict( + **processed_inputs, + audio_features=audio_features, + audio_token_len=audio_token_len, + ) - if "multi_modal_placeholders" in inputs and "audio" in inputs[ - "multi_modal_placeholders"]: - # The inputs already have placeholders. - return inputs + def _get_processor_data( + self, + mm_data: MultiModalDataDict, + ) -> Tuple[Dict[str, Any], Dict[str, Any]]: + # Ultravox uses "audio" instead of "audios" as calling keyword + processor_data, passthrough_data = super()._get_processor_data(mm_data) + if "audios" in processor_data: + processor_data["audio"] = processor_data.pop("audios") + return processor_data, passthrough_data + + def _get_prompt_replacements( + self, + mm_items: MultiModalDataItems, + hf_inputs: BatchFeature, + mm_processor_kwargs: Mapping[str, object], + ) -> list[PromptReplacement]: + hf_processor = self._get_hf_processor() + placeholder = hf_processor.audio_token_replacement + + def get_replacement_ultravox(item_idx: int): + audio_token_len = hf_inputs["audio_token_len"][item_idx] + return placeholder * audio_token_len + + return [ + PromptReplacement( + modality="audio", + target="<|audio|>", + replacement=get_replacement_ultravox, + ) + ] - feature_extractor = whisper_feature_extractor(ctx) - audios = multi_modal_data["audio"] - if not isinstance(audios, list): - audios = [audios] - - audio_token_counts = [] - for audio in audios: - if isinstance(audio, torch.Tensor): - audio_num_tokens = audio.shape[1] - audio_token_counts.append(audio_num_tokens) - else: - audio_data, sample_rate = audio - audio_length = audio_data.shape[0] - if sample_rate != feature_extractor.sampling_rate: - # Account for resampling. - adjustment = feature_extractor.sampling_rate / sample_rate - audio_length = math.ceil(adjustment * audio_length) - - feature_extractor_output_length = math.ceil( - (audio_length - (feature_extractor.hop_length - 1)) / - feature_extractor.hop_length) - - uv_config = ctx.get_hf_config(UltravoxConfig) - audio_num_tokens = min( - max( - 1, - math.ceil(feature_extractor_output_length / - (uv_config.stack_factor * 2))), - get_ultravox_max_audio_tokens(ctx)) - audio_token_counts.append(audio_num_tokens) - - tokenizer = cached_get_tokenizer(ctx.model_config.tokenizer) - - new_prompt, new_token_ids, ranges = repeat_and_pad_placeholder_tokens( - tokenizer, - inputs.get("prompt"), - inputs["prompt_token_ids"], - placeholder_token_id=_AUDIO_PLACEHOLDER_TOKEN, - repeat_count=audio_token_counts, - ) - - # NOTE: Create a defensive copy of the original inputs - return token_inputs(prompt_token_ids=new_token_ids, - prompt=new_prompt, - multi_modal_data=multi_modal_data, - multi_modal_placeholders={"audio": ranges}) + def _get_dummy_mm_inputs( + self, + mm_counts: Mapping[str, int], + ) -> ProcessorInputs: + feature_extractor = self._get_feature_extractor() + sampling_rate = feature_extractor.sampling_rate + audio_len = feature_extractor.chunk_length * sampling_rate + + audio_count = mm_counts["audio"] + audio = np.zeros(audio_len) + data = {"audio": [(audio, sampling_rate)] * audio_count} + + return ProcessorInputs( + prompt_text="<|audio|>" * audio_count, + mm_data=data, + mm_processor_kwargs={}, + ) class StackAudioFrames(nn.Module): @@ -332,11 +298,9 @@ def forward( return hidden_states -@MULTIMODAL_REGISTRY.register_input_mapper("audio", input_mapper_for_ultravox) @MULTIMODAL_REGISTRY.register_max_multimodal_tokens( "audio", get_ultravox_max_audio_tokens) -@INPUT_REGISTRY.register_dummy_data(dummy_data_for_ultravox) -@INPUT_REGISTRY.register_input_processor(input_processor_for_ultravox) +@MULTIMODAL_REGISTRY.register_processor(UltravoxMultiModalProcessor) class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index ce6bec1d49aac..339e193eefe20 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -594,14 +594,10 @@ def _find_placeholders( return list( iter_placeholders(all_prompt_repls, new_token_ids, mm_item_counts)) - def _apply_hf_processor( + def _get_processor_data( self, - prompt: str, mm_data: MultiModalDataDict, - mm_processor_kwargs: Mapping[str, object], ) -> BatchFeature: - hf_processor = self._get_hf_processor(**mm_processor_kwargs) - processor_data = dict[str, Any]() passthrough_data = dict[str, Any]() for k, v in mm_data.items(): @@ -619,6 +615,19 @@ def _apply_hf_processor( processor_data[f"{k}s"] = v else: processor_data[k] = v + return processor_data, passthrough_data + + def _apply_hf_processor( + self, + prompt: str, + mm_data: MultiModalDataDict, + mm_processor_kwargs: Mapping[str, object], + ) -> BatchFeature: + # some mm_processor_kwargs may be used in processor initialization + # instead of processor call + hf_processor = self._get_hf_processor(**mm_processor_kwargs) + + processor_data, passthrough_data = self._get_processor_data(mm_data) assert callable(hf_processor) mm_processor_kwargs = self.ctx.resolve_hf_processor_call_kwargs( From 2ca830dbaa1a7c30b8ff4d7c860c63f87dc18be3 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Mon, 16 Dec 2024 19:23:33 +0800 Subject: [PATCH 14/72] [Doc] Reorder vision language examples in alphabet order (#11228) Signed-off-by: Isotr0py <2037008807@qq.com> --- examples/offline_inference_vision_language.py | 486 +++++++++--------- ...e_inference_vision_language_multi_image.py | 288 +++++------ 2 files changed, 387 insertions(+), 387 deletions(-) diff --git a/examples/offline_inference_vision_language.py b/examples/offline_inference_vision_language.py index 7bc43242b717e..6d0495fdd4054 100644 --- a/examples/offline_inference_vision_language.py +++ b/examples/offline_inference_vision_language.py @@ -19,6 +19,159 @@ # Unless specified, these settings have been tested to work on a single L4. +# Aria +def run_aria(question: str, modality: str): + assert modality == "image" + model_name = "rhymes-ai/Aria" + + llm = LLM(model=model_name, + tokenizer_mode="slow", + trust_remote_code=True, + dtype="bfloat16", + mm_cache_preprocessor=args.mm_cache_preprocessor) + + prompt = (f"<|im_start|>user\n<|img|>\n{question}" + "<|im_end|>\n<|im_start|>assistant\n") + + stop_token_ids = [93532, 93653, 944, 93421, 1019, 93653, 93519] + return llm, prompt, stop_token_ids + + +# BLIP-2 +def run_blip2(question: str, modality: str): + assert modality == "image" + + # BLIP-2 prompt format is inaccurate on HuggingFace model repository. + # See https://huggingface.co/Salesforce/blip2-opt-2.7b/discussions/15#64ff02f3f8cf9e4f5b038262 #noqa + prompt = f"Question: {question} Answer:" + llm = LLM(model="Salesforce/blip2-opt-2.7b", + mm_cache_preprocessor=args.mm_cache_preprocessor) + stop_token_ids = None + return llm, prompt, stop_token_ids + + +# Chameleon +def run_chameleon(question: str, modality: str): + assert modality == "image" + + prompt = f"{question}" + llm = LLM(model="facebook/chameleon-7b", + max_model_len=4096, + mm_cache_preprocessor=args.mm_cache_preprocessor) + stop_token_ids = None + return llm, prompt, stop_token_ids + + +# Fuyu +def run_fuyu(question: str, modality: str): + assert modality == "image" + + prompt = f"{question}\n" + llm = LLM(model="adept/fuyu-8b", + max_model_len=2048, + max_num_seqs=2, + mm_cache_preprocessor=args.mm_cache_preprocessor) + stop_token_ids = None + return llm, prompt, stop_token_ids + + +# GLM-4v +def run_glm4v(question: str, modality: str): + assert modality == "image" + model_name = "THUDM/glm-4v-9b" + + llm = LLM(model=model_name, + max_model_len=2048, + max_num_seqs=2, + trust_remote_code=True, + enforce_eager=True, + mm_cache_preprocessor=args.mm_cache_preprocessor) + prompt = question + stop_token_ids = [151329, 151336, 151338] + return llm, prompt, stop_token_ids + + +# H2OVL-Mississippi +def run_h2ovl(question: str, modality: str): + assert modality == "image" + + model_name = "h2oai/h2ovl-mississippi-2b" + + llm = LLM( + model=model_name, + trust_remote_code=True, + max_model_len=8192, + mm_cache_preprocessor=args.mm_cache_preprocessor, + ) + + tokenizer = AutoTokenizer.from_pretrained(model_name, + trust_remote_code=True) + messages = [{'role': 'user', 'content': f"\n{question}"}] + prompt = tokenizer.apply_chat_template(messages, + tokenize=False, + add_generation_prompt=True) + + # Stop tokens for H2OVL-Mississippi + # https://huggingface.co/h2oai/h2ovl-mississippi-2b + stop_token_ids = [tokenizer.eos_token_id] + return llm, prompt, stop_token_ids + + +# Idefics3-8B-Llama3 +def run_idefics3(question: str, modality: str): + assert modality == "image" + model_name = "HuggingFaceM4/Idefics3-8B-Llama3" + + llm = LLM( + model=model_name, + max_model_len=8192, + max_num_seqs=2, + enforce_eager=True, + # if you are running out of memory, you can reduce the "longest_edge". + # see: https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3#model-optimizations + mm_processor_kwargs={ + "size": { + "longest_edge": 3 * 364 + }, + }, + mm_cache_preprocessor=args.mm_cache_preprocessor, + ) + prompt = ( + f"<|begin_of_text|>User:{question}\nAssistant:" + ) + stop_token_ids = None + return llm, prompt, stop_token_ids + + +# InternVL +def run_internvl(question: str, modality: str): + assert modality == "image" + + model_name = "OpenGVLab/InternVL2-2B" + + llm = LLM( + model=model_name, + trust_remote_code=True, + max_model_len=4096, + mm_cache_preprocessor=args.mm_cache_preprocessor, + ) + + tokenizer = AutoTokenizer.from_pretrained(model_name, + trust_remote_code=True) + messages = [{'role': 'user', 'content': f"\n{question}"}] + prompt = tokenizer.apply_chat_template(messages, + tokenize=False, + add_generation_prompt=True) + + # Stop tokens for InternVL + # models variants may have different stop tokens + # please refer to the model card for the correct "stop words": + # https://huggingface.co/OpenGVLab/InternVL2-2B/blob/main/conversation.py + stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>", "<|end|>"] + stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens] + return llm, prompt, stop_token_ids + + # LLaVA-1.5 def run_llava(question: str, modality: str): assert modality == "image" @@ -75,83 +228,20 @@ def run_llava_onevision(question: str, modality: str): return llm, prompt, stop_token_ids -# Fuyu -def run_fuyu(question: str, modality: str): - assert modality == "image" - - prompt = f"{question}\n" - llm = LLM(model="adept/fuyu-8b", - max_model_len=2048, - max_num_seqs=2, - mm_cache_preprocessor=args.mm_cache_preprocessor) - stop_token_ids = None - return llm, prompt, stop_token_ids - - -# Phi-3-Vision -def run_phi3v(question: str, modality: str): +# Mantis +def run_mantis(question: str, modality: str): assert modality == "image" - prompt = f"<|user|>\n<|image_1|>\n{question}<|end|>\n<|assistant|>\n" + llama3_template = '<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' # noqa: E501 + prompt = llama3_template.format(f"{question}\n") - # num_crops is an override kwarg to the multimodal image processor; - # For some models, e.g., Phi-3.5-vision-instruct, it is recommended - # to use 16 for single frame scenarios, and 4 for multi-frame. - # - # Generally speaking, a larger value for num_crops results in more - # tokens per image instance, because it may scale the image more in - # the image preprocessing. Some references in the model docs and the - # formula for image tokens after the preprocessing - # transform can be found below. - # - # https://huggingface.co/microsoft/Phi-3.5-vision-instruct#loading-the-model-locally - # https://huggingface.co/microsoft/Phi-3.5-vision-instruct/blob/main/processing_phi3_v.py#L194 llm = LLM( - model="microsoft/Phi-3.5-vision-instruct", - trust_remote_code=True, + model="TIGER-Lab/Mantis-8B-siglip-llama3", max_model_len=4096, - max_num_seqs=2, - # Note - mm_processor_kwargs can also be passed to generate/chat calls - mm_processor_kwargs={"num_crops": 16}, + hf_overrides={"architectures": ["MantisForConditionalGeneration"]}, mm_cache_preprocessor=args.mm_cache_preprocessor, ) - stop_token_ids = None - return llm, prompt, stop_token_ids - - -# PaliGemma -def run_paligemma(question: str, modality: str): - assert modality == "image" - - # PaliGemma has special prompt format for VQA - prompt = "caption en" - llm = LLM(model="google/paligemma-3b-mix-224", - mm_cache_preprocessor=args.mm_cache_preprocessor) - stop_token_ids = None - return llm, prompt, stop_token_ids - - -# PaliGemma 2 -def run_paligemma2(question: str, modality: str): - assert modality == "image" - - # PaliGemma 2 has special prompt format for VQA - prompt = "caption en" - llm = LLM(model="google/paligemma2-3b-ft-docci-448", - mm_cache_preprocessor=args.mm_cache_preprocessor) - stop_token_ids = None - return llm, prompt, stop_token_ids - - -# Chameleon -def run_chameleon(question: str, modality: str): - assert modality == "image" - - prompt = f"{question}" - llm = LLM(model="facebook/chameleon-7b", - max_model_len=4096, - mm_cache_preprocessor=args.mm_cache_preprocessor) - stop_token_ids = None + stop_token_ids = [128009] return llm, prompt, stop_token_ids @@ -199,58 +289,45 @@ def run_minicpmv(question: str, modality: str): return llm, prompt, stop_token_ids -# H2OVL-Mississippi -def run_h2ovl(question: str, modality: str): +# LLama 3.2 +def run_mllama(question: str, modality: str): assert modality == "image" - model_name = "h2oai/h2ovl-mississippi-2b" + model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct" + # Note: The default setting of max_num_seqs (256) and + # max_model_len (131072) for this model may cause OOM. + # You may lower either to run this example on lower-end GPUs. + + # The configuration below has been confirmed to launch on a single L40 GPU. llm = LLM( model=model_name, - trust_remote_code=True, - max_model_len=8192, + max_model_len=4096, + max_num_seqs=16, + enforce_eager=True, mm_cache_preprocessor=args.mm_cache_preprocessor, ) - tokenizer = AutoTokenizer.from_pretrained(model_name, - trust_remote_code=True) - messages = [{'role': 'user', 'content': f"\n{question}"}] - prompt = tokenizer.apply_chat_template(messages, - tokenize=False, - add_generation_prompt=True) - - # Stop tokens for H2OVL-Mississippi - # https://huggingface.co/h2oai/h2ovl-mississippi-2b - stop_token_ids = [tokenizer.eos_token_id] + prompt = f"<|image|><|begin_of_text|>{question}" + stop_token_ids = None return llm, prompt, stop_token_ids -# InternVL -def run_internvl(question: str, modality: str): +# Molmo +def run_molmo(question, modality): assert modality == "image" - model_name = "OpenGVLab/InternVL2-2B" + model_name = "allenai/Molmo-7B-D-0924" llm = LLM( model=model_name, trust_remote_code=True, - max_model_len=4096, + dtype="bfloat16", mm_cache_preprocessor=args.mm_cache_preprocessor, ) - tokenizer = AutoTokenizer.from_pretrained(model_name, - trust_remote_code=True) - messages = [{'role': 'user', 'content': f"\n{question}"}] - prompt = tokenizer.apply_chat_template(messages, - tokenize=False, - add_generation_prompt=True) - - # Stop tokens for InternVL - # models variants may have different stop tokens - # please refer to the model card for the correct "stop words": - # https://huggingface.co/OpenGVLab/InternVL2-2B/blob/main/conversation.py - stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>", "<|end|>"] - stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens] + prompt = question + stop_token_ids = None return llm, prompt, stop_token_ids @@ -279,58 +356,57 @@ def run_nvlm_d(question: str, modality: str): return llm, prompt, stop_token_ids -# BLIP-2 -def run_blip2(question: str, modality: str): +# PaliGemma +def run_paligemma(question: str, modality: str): assert modality == "image" - # BLIP-2 prompt format is inaccurate on HuggingFace model repository. - # See https://huggingface.co/Salesforce/blip2-opt-2.7b/discussions/15#64ff02f3f8cf9e4f5b038262 #noqa - prompt = f"Question: {question} Answer:" - llm = LLM(model="Salesforce/blip2-opt-2.7b", + # PaliGemma has special prompt format for VQA + prompt = "caption en" + llm = LLM(model="google/paligemma-3b-mix-224", mm_cache_preprocessor=args.mm_cache_preprocessor) stop_token_ids = None return llm, prompt, stop_token_ids -# Qwen -def run_qwen_vl(question: str, modality: str): +# PaliGemma 2 +def run_paligemma2(question: str, modality: str): assert modality == "image" - llm = LLM( - model="Qwen/Qwen-VL", - trust_remote_code=True, - max_model_len=1024, - max_num_seqs=2, - mm_cache_preprocessor=args.mm_cache_preprocessor, - ) - - prompt = f"{question}Picture 1: \n" + # PaliGemma 2 has special prompt format for VQA + prompt = "caption en" + llm = LLM(model="google/paligemma2-3b-ft-docci-448", + mm_cache_preprocessor=args.mm_cache_preprocessor) stop_token_ids = None return llm, prompt, stop_token_ids -# Qwen2-VL -def run_qwen2_vl(question: str, modality: str): +# Phi-3-Vision +def run_phi3v(question: str, modality: str): assert modality == "image" - model_name = "Qwen/Qwen2-VL-7B-Instruct" + prompt = f"<|user|>\n<|image_1|>\n{question}<|end|>\n<|assistant|>\n" + # num_crops is an override kwarg to the multimodal image processor; + # For some models, e.g., Phi-3.5-vision-instruct, it is recommended + # to use 16 for single frame scenarios, and 4 for multi-frame. + # + # Generally speaking, a larger value for num_crops results in more + # tokens per image instance, because it may scale the image more in + # the image preprocessing. Some references in the model docs and the + # formula for image tokens after the preprocessing + # transform can be found below. + # + # https://huggingface.co/microsoft/Phi-3.5-vision-instruct#loading-the-model-locally + # https://huggingface.co/microsoft/Phi-3.5-vision-instruct/blob/main/processing_phi3_v.py#L194 llm = LLM( - model=model_name, + model="microsoft/Phi-3.5-vision-instruct", + trust_remote_code=True, max_model_len=4096, - max_num_seqs=5, + max_num_seqs=2, # Note - mm_processor_kwargs can also be passed to generate/chat calls - mm_processor_kwargs={ - "min_pixels": 28 * 28, - "max_pixels": 1280 * 28 * 28, - }, + mm_processor_kwargs={"num_crops": 16}, mm_cache_preprocessor=args.mm_cache_preprocessor, ) - - prompt = ("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" - "<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>" - f"{question}<|im_end|>\n" - "<|im_start|>assistant\n") stop_token_ids = None return llm, prompt, stop_token_ids @@ -352,149 +428,73 @@ def run_pixtral_hf(question: str, modality: str): return llm, prompt, stop_token_ids -# LLama 3.2 -def run_mllama(question: str, modality: str): - assert modality == "image" - - model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct" - - # Note: The default setting of max_num_seqs (256) and - # max_model_len (131072) for this model may cause OOM. - # You may lower either to run this example on lower-end GPUs. - - # The configuration below has been confirmed to launch on a single L40 GPU. - llm = LLM( - model=model_name, - max_model_len=4096, - max_num_seqs=16, - enforce_eager=True, - mm_cache_preprocessor=args.mm_cache_preprocessor, - ) - - prompt = f"<|image|><|begin_of_text|>{question}" - stop_token_ids = None - return llm, prompt, stop_token_ids - - -# Molmo -def run_molmo(question, modality): +# Qwen +def run_qwen_vl(question: str, modality: str): assert modality == "image" - model_name = "allenai/Molmo-7B-D-0924" - llm = LLM( - model=model_name, + model="Qwen/Qwen-VL", trust_remote_code=True, - dtype="bfloat16", + max_model_len=1024, + max_num_seqs=2, mm_cache_preprocessor=args.mm_cache_preprocessor, ) - prompt = question + prompt = f"{question}Picture 1: \n" stop_token_ids = None return llm, prompt, stop_token_ids -# GLM-4v -def run_glm4v(question: str, modality: str): +# Qwen2-VL +def run_qwen2_vl(question: str, modality: str): assert modality == "image" - model_name = "THUDM/glm-4v-9b" - llm = LLM(model=model_name, - max_model_len=2048, - max_num_seqs=2, - trust_remote_code=True, - enforce_eager=True, - mm_cache_preprocessor=args.mm_cache_preprocessor) - prompt = question - stop_token_ids = [151329, 151336, 151338] - return llm, prompt, stop_token_ids - - -# Idefics3-8B-Llama3 -def run_idefics3(question: str, modality: str): - assert modality == "image" - model_name = "HuggingFaceM4/Idefics3-8B-Llama3" + model_name = "Qwen/Qwen2-VL-7B-Instruct" llm = LLM( model=model_name, - max_model_len=8192, - max_num_seqs=2, - enforce_eager=True, - # if you are running out of memory, you can reduce the "longest_edge". - # see: https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3#model-optimizations + max_model_len=4096, + max_num_seqs=5, + # Note - mm_processor_kwargs can also be passed to generate/chat calls mm_processor_kwargs={ - "size": { - "longest_edge": 3 * 364 - }, + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, }, mm_cache_preprocessor=args.mm_cache_preprocessor, ) - prompt = ( - f"<|begin_of_text|>User:{question}\nAssistant:" - ) - stop_token_ids = None - return llm, prompt, stop_token_ids - -# Aria -def run_aria(question: str, modality: str): - assert modality == "image" - model_name = "rhymes-ai/Aria" - - llm = LLM(model=model_name, - tokenizer_mode="slow", - trust_remote_code=True, - dtype="bfloat16", - mm_cache_preprocessor=args.mm_cache_preprocessor) - - prompt = (f"<|im_start|>user\n<|img|>\n{question}" - "<|im_end|>\n<|im_start|>assistant\n") - - stop_token_ids = [93532, 93653, 944, 93421, 1019, 93653, 93519] - return llm, prompt, stop_token_ids - - -# Mantis -def run_mantis(question: str, modality: str): - assert modality == "image" - - llama3_template = '<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' # noqa: E501 - prompt = llama3_template.format(f"{question}\n") - - llm = LLM( - model="TIGER-Lab/Mantis-8B-siglip-llama3", - max_model_len=4096, - hf_overrides={"architectures": ["MantisForConditionalGeneration"]}, - mm_cache_preprocessor=args.mm_cache_preprocessor, - ) - stop_token_ids = [128009] + prompt = ("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + "<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>" + f"{question}<|im_end|>\n" + "<|im_start|>assistant\n") + stop_token_ids = None return llm, prompt, stop_token_ids model_example_map = { + "aria": run_aria, + "blip-2": run_blip2, + "chameleon": run_chameleon, + "fuyu": run_fuyu, + "glm4v": run_glm4v, + "h2ovl_chat": run_h2ovl, + "idefics3": run_idefics3, + "internvl_chat": run_internvl, "llava": run_llava, "llava-next": run_llava_next, "llava-next-video": run_llava_next_video, "llava-onevision": run_llava_onevision, - "fuyu": run_fuyu, - "phi3_v": run_phi3v, - "paligemma": run_paligemma, - "paligemma2": run_paligemma2, - "chameleon": run_chameleon, + "mantis": run_mantis, "minicpmv": run_minicpmv, - "blip-2": run_blip2, - "h2ovl_chat": run_h2ovl, - "internvl_chat": run_internvl, + "mllama": run_mllama, + "molmo": run_molmo, "NVLM_D": run_nvlm_d, + "paligemma": run_paligemma, + "paligemma2": run_paligemma2, + "phi3_v": run_phi3v, + "pixtral_hf": run_pixtral_hf, "qwen_vl": run_qwen_vl, "qwen2_vl": run_qwen2_vl, - "pixtral_hf": run_pixtral_hf, - "mllama": run_mllama, - "molmo": run_molmo, - "glm4v": run_glm4v, - "idefics3": run_idefics3, - "aria": run_aria, - "mantis": run_mantis, } diff --git a/examples/offline_inference_vision_language_multi_image.py b/examples/offline_inference_vision_language_multi_image.py index 928bbef54eab7..6af8d7768e75d 100644 --- a/examples/offline_inference_vision_language_multi_image.py +++ b/examples/offline_inference_vision_language_multi_image.py @@ -33,78 +33,23 @@ class ModelRequestData(NamedTuple): # Unless specified, these settings have been tested to work on a single L4. -def load_qwenvl_chat(question: str, image_urls: List[str]) -> ModelRequestData: - model_name = "Qwen/Qwen-VL-Chat" - llm = LLM( - model=model_name, - trust_remote_code=True, - max_model_len=1024, - max_num_seqs=2, - limit_mm_per_prompt={"image": len(image_urls)}, - ) - placeholders = "".join(f"Picture {i}: \n" - for i, _ in enumerate(image_urls, start=1)) - - # This model does not have a chat_template attribute on its tokenizer, - # so we need to explicitly pass it. We use ChatML since it's used in the - # generation utils of the model: - # https://huggingface.co/Qwen/Qwen-VL-Chat/blob/main/qwen_generation_utils.py#L265 - tokenizer = AutoTokenizer.from_pretrained(model_name, - trust_remote_code=True) - - # Copied from: https://huggingface.co/docs/transformers/main/en/chat_templating - chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}" # noqa: E501 - - messages = [{'role': 'user', 'content': f"{placeholders}\n{question}"}] - prompt = tokenizer.apply_chat_template(messages, - tokenize=False, - add_generation_prompt=True, - chat_template=chat_template) - - stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>"] - stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens] - return ModelRequestData( - llm=llm, - prompt=prompt, - stop_token_ids=stop_token_ids, - image_data=[fetch_image(url) for url in image_urls], - chat_template=chat_template, - ) - - -def load_phi3v(question: str, image_urls: List[str]) -> ModelRequestData: - # num_crops is an override kwarg to the multimodal image processor; - # For some models, e.g., Phi-3.5-vision-instruct, it is recommended - # to use 16 for single frame scenarios, and 4 for multi-frame. - # - # Generally speaking, a larger value for num_crops results in more - # tokens per image instance, because it may scale the image more in - # the image preprocessing. Some references in the model docs and the - # formula for image tokens after the preprocessing - # transform can be found below. - # - # https://huggingface.co/microsoft/Phi-3.5-vision-instruct#loading-the-model-locally - # https://huggingface.co/microsoft/Phi-3.5-vision-instruct/blob/main/processing_phi3_v.py#L194 - llm = LLM( - model="microsoft/Phi-3.5-vision-instruct", - trust_remote_code=True, - max_model_len=4096, - max_num_seqs=2, - limit_mm_per_prompt={"image": len(image_urls)}, - mm_processor_kwargs={"num_crops": 4}, - ) - placeholders = "\n".join(f"<|image_{i}|>" - for i, _ in enumerate(image_urls, start=1)) - prompt = f"<|user|>\n{placeholders}\n{question}<|end|>\n<|assistant|>\n" - stop_token_ids = None - +def load_aria(question, image_urls: List[str]) -> ModelRequestData: + model_name = "rhymes-ai/Aria" + llm = LLM(model=model_name, + tokenizer_mode="slow", + trust_remote_code=True, + dtype="bfloat16", + limit_mm_per_prompt={"image": len(image_urls)}) + placeholders = "<|img|>\n" * len(image_urls) + prompt = (f"<|im_start|>user\n{placeholders}{question}<|im_end|>\n" + "<|im_start|>assistant\n") + stop_token_ids = [93532, 93653, 944, 93421, 1019, 93653, 93519] return ModelRequestData( llm=llm, prompt=prompt, stop_token_ids=stop_token_ids, image_data=[fetch_image(url) for url in image_urls], - chat_template=None, - ) + chat_template=None) def load_h2onvl(question: str, image_urls: List[str]) -> ModelRequestData: @@ -141,6 +86,37 @@ def load_h2onvl(question: str, image_urls: List[str]) -> ModelRequestData: ) +def load_idefics3(question, image_urls: List[str]) -> ModelRequestData: + model_name = "HuggingFaceM4/Idefics3-8B-Llama3" + + # The configuration below has been confirmed to launch on a single L40 GPU. + llm = LLM( + model=model_name, + max_model_len=8192, + max_num_seqs=16, + enforce_eager=True, + limit_mm_per_prompt={"image": len(image_urls)}, + # if you are running out of memory, you can reduce the "longest_edge". + # see: https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3#model-optimizations + mm_processor_kwargs={ + "size": { + "longest_edge": 2 * 364 + }, + }, + ) + + placeholders = "\n".join(f"Image-{i}: \n" + for i, _ in enumerate(image_urls, start=1)) + prompt = f"<|begin_of_text|>User:{placeholders}\n{question}\nAssistant:" # noqa: E501 + return ModelRequestData( + llm=llm, + prompt=prompt, + stop_token_ids=None, + image_data=[fetch_image(url) for url in image_urls], + chat_template=None, + ) + + def load_internvl(question: str, image_urls: List[str]) -> ModelRequestData: model_name = "OpenGVLab/InternVL2-2B" @@ -178,6 +154,28 @@ def load_internvl(question: str, image_urls: List[str]) -> ModelRequestData: ) +def load_mllama(question, image_urls: List[str]) -> ModelRequestData: + model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct" + + # The configuration below has been confirmed to launch on a single L40 GPU. + llm = LLM( + model=model_name, + max_model_len=4096, + max_num_seqs=16, + enforce_eager=True, + limit_mm_per_prompt={"image": len(image_urls)}, + ) + + prompt = f"<|image|><|image|><|begin_of_text|>{question}" + return ModelRequestData( + llm=llm, + prompt=prompt, + stop_token_ids=None, + image_data=[fetch_image(url) for url in image_urls], + chat_template=None, + ) + + def load_nvlm_d(question: str, image_urls: List[str]): model_name = "nvidia/NVLM-D-72B" @@ -211,6 +209,80 @@ def load_nvlm_d(question: str, image_urls: List[str]): ) +def load_phi3v(question: str, image_urls: List[str]) -> ModelRequestData: + # num_crops is an override kwarg to the multimodal image processor; + # For some models, e.g., Phi-3.5-vision-instruct, it is recommended + # to use 16 for single frame scenarios, and 4 for multi-frame. + # + # Generally speaking, a larger value for num_crops results in more + # tokens per image instance, because it may scale the image more in + # the image preprocessing. Some references in the model docs and the + # formula for image tokens after the preprocessing + # transform can be found below. + # + # https://huggingface.co/microsoft/Phi-3.5-vision-instruct#loading-the-model-locally + # https://huggingface.co/microsoft/Phi-3.5-vision-instruct/blob/main/processing_phi3_v.py#L194 + llm = LLM( + model="microsoft/Phi-3.5-vision-instruct", + trust_remote_code=True, + max_model_len=4096, + max_num_seqs=2, + limit_mm_per_prompt={"image": len(image_urls)}, + mm_processor_kwargs={"num_crops": 4}, + ) + placeholders = "\n".join(f"<|image_{i}|>" + for i, _ in enumerate(image_urls, start=1)) + prompt = f"<|user|>\n{placeholders}\n{question}<|end|>\n<|assistant|>\n" + stop_token_ids = None + + return ModelRequestData( + llm=llm, + prompt=prompt, + stop_token_ids=stop_token_ids, + image_data=[fetch_image(url) for url in image_urls], + chat_template=None, + ) + + +def load_qwenvl_chat(question: str, image_urls: List[str]) -> ModelRequestData: + model_name = "Qwen/Qwen-VL-Chat" + llm = LLM( + model=model_name, + trust_remote_code=True, + max_model_len=1024, + max_num_seqs=2, + limit_mm_per_prompt={"image": len(image_urls)}, + ) + placeholders = "".join(f"Picture {i}: \n" + for i, _ in enumerate(image_urls, start=1)) + + # This model does not have a chat_template attribute on its tokenizer, + # so we need to explicitly pass it. We use ChatML since it's used in the + # generation utils of the model: + # https://huggingface.co/Qwen/Qwen-VL-Chat/blob/main/qwen_generation_utils.py#L265 + tokenizer = AutoTokenizer.from_pretrained(model_name, + trust_remote_code=True) + + # Copied from: https://huggingface.co/docs/transformers/main/en/chat_templating + chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}" # noqa: E501 + + messages = [{'role': 'user', 'content': f"{placeholders}\n{question}"}] + prompt = tokenizer.apply_chat_template(messages, + tokenize=False, + add_generation_prompt=True, + chat_template=chat_template) + + stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>"] + stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens] + return ModelRequestData( + llm=llm, + prompt=prompt, + stop_token_ids=stop_token_ids, + image_data=[fetch_image(url) for url in image_urls], + chat_template=chat_template, + ) + + def load_qwen2_vl(question, image_urls: List[str]) -> ModelRequestData: try: from qwen_vl_utils import process_vision_info @@ -268,88 +340,16 @@ def load_qwen2_vl(question, image_urls: List[str]) -> ModelRequestData: ) -def load_mllama(question, image_urls: List[str]) -> ModelRequestData: - model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct" - - # The configuration below has been confirmed to launch on a single L40 GPU. - llm = LLM( - model=model_name, - max_model_len=4096, - max_num_seqs=16, - enforce_eager=True, - limit_mm_per_prompt={"image": len(image_urls)}, - ) - - prompt = f"<|image|><|image|><|begin_of_text|>{question}" - return ModelRequestData( - llm=llm, - prompt=prompt, - stop_token_ids=None, - image_data=[fetch_image(url) for url in image_urls], - chat_template=None, - ) - - -def load_idefics3(question, image_urls: List[str]) -> ModelRequestData: - model_name = "HuggingFaceM4/Idefics3-8B-Llama3" - - # The configuration below has been confirmed to launch on a single L40 GPU. - llm = LLM( - model=model_name, - max_model_len=8192, - max_num_seqs=16, - enforce_eager=True, - limit_mm_per_prompt={"image": len(image_urls)}, - # if you are running out of memory, you can reduce the "longest_edge". - # see: https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3#model-optimizations - mm_processor_kwargs={ - "size": { - "longest_edge": 2 * 364 - }, - }, - ) - - placeholders = "\n".join(f"Image-{i}: \n" - for i, _ in enumerate(image_urls, start=1)) - prompt = f"<|begin_of_text|>User:{placeholders}\n{question}\nAssistant:" # noqa: E501 - return ModelRequestData( - llm=llm, - prompt=prompt, - stop_token_ids=None, - image_data=[fetch_image(url) for url in image_urls], - chat_template=None, - ) - - -def load_aria(question, image_urls: List[str]) -> ModelRequestData: - model_name = "rhymes-ai/Aria" - llm = LLM(model=model_name, - tokenizer_mode="slow", - trust_remote_code=True, - dtype="bfloat16", - limit_mm_per_prompt={"image": len(image_urls)}) - placeholders = "<|img|>\n" * len(image_urls) - prompt = (f"<|im_start|>user\n{placeholders}{question}<|im_end|>\n" - "<|im_start|>assistant\n") - stop_token_ids = [93532, 93653, 944, 93421, 1019, 93653, 93519] - return ModelRequestData( - llm=llm, - prompt=prompt, - stop_token_ids=stop_token_ids, - image_data=[fetch_image(url) for url in image_urls], - chat_template=None) - - model_example_map = { - "phi3_v": load_phi3v, + "aria": load_aria, "h2ovl_chat": load_h2onvl, + "idefics3": load_idefics3, "internvl_chat": load_internvl, + "mllama": load_mllama, "NVLM_D": load_nvlm_d, - "qwen2_vl": load_qwen2_vl, + "phi3_v": load_phi3v, "qwen_vl_chat": load_qwenvl_chat, - "mllama": load_mllama, - "idefics3": load_idefics3, - "aria": load_aria, + "qwen2_vl": load_qwen2_vl, } From efbce85f4d375d7851a491a0126a224e25d9f91d Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Mon, 16 Dec 2024 13:14:57 -0500 Subject: [PATCH 15/72] [misc] Layerwise profile updates (#10242) Signed-off-by: Varun Sundar Rabindranath Co-authored-by: Varun Sundar Rabindranath --- .buildkite/test-pipeline.yaml | 2 +- examples/offline_profile.py | 236 +++++++++++++++--- tools/profiler/print_layerwise_table.py | 9 +- tools/profiler/visualize_layerwise_profile.py | 92 ++++++- vllm/profiler/layerwise_profile.py | 22 +- 5 files changed, 314 insertions(+), 47 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 97aae233db105..44f47fac1c1b3 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -201,7 +201,7 @@ steps: - python3 offline_inference_classification.py - python3 offline_inference_embedding.py - python3 offline_inference_scoring.py - - python3 offline_profile.py --model facebook/opt-125m + - python3 offline_profile.py --model facebook/opt-125m run_num_steps --num-steps 2 - label: Prefix Caching Test # 9min mirror_hardwares: [amd] diff --git a/examples/offline_profile.py b/examples/offline_profile.py index 1d415b82cddb6..46afe8aa2604b 100644 --- a/examples/offline_profile.py +++ b/examples/offline_profile.py @@ -4,9 +4,10 @@ import sys from argparse import RawTextHelpFormatter from dataclasses import asdict, dataclass -from typing import Optional +from typing import Any, Dict, Generator, List, Optional, TypeAlias import torch +import tqdm from vllm import LLM, SamplingParams from vllm.engine.arg_utils import EngineArgs @@ -15,16 +16,21 @@ BATCH_SIZE_DEFAULT = 1 PROMPT_LEN_DEFAULT = 256 -OUTPUT_LEN_DEFAULT = 2 @dataclass class ProfileContext: engine_args: EngineArgs prompt_len: int - output_len: int batch_size: int - save_chrome_traces_folder: Optional[str] + + # The profiler can run in 2 modes, + # 1. Run profiler for user specified num_steps + num_steps: Optional[int] = None + # 2. Run profiler until all requests complete + complete_num_requests_per_step: Optional[int] = None + + save_chrome_traces_folder: Optional[str] = None def get_dtype(dtype: str): @@ -34,23 +40,155 @@ def get_dtype(dtype: str): return dtype +OutputLen_NumReqs_Map: TypeAlias = Dict[int, int] +def compute_request_output_lengths(batch_size: int, step_requests: List[int]) \ + -> OutputLen_NumReqs_Map: + """ + Given the number of requests, batch_size, and the number of requests + that each engine-step should process, step_requests, determine the + output lengths of the requests such that step_request is honoured. + + Example: + if batch size = 128 and step_request = [128, 128, 96, 64, 32, 1] + then return, + {2 : 32, 3 : 32, 4 : 32, 5 : 31, 6 : 1}, meaning, + 32 requests should have output length 2, + 32 requests should have output length 3, + 32 requests should have output length 4, + 31 requests should have output length 5, + 1 request should have output length 6. + + Args: + batch_size (int): Number of requests submitted for profile. This is + args.batch_size. + step_requests (List[int]): step_requests[i] is the number of requests + that the ith engine step should process. + + Returns: + OutputLen_NumReqs_Map : A dictionary with output-length as keys and the + number of requests required to have that output-length as values. + """ + ol_nr: OutputLen_NumReqs_Map = {} + + # Number of request that are assigned an output-length + num_reqs_assigned: int = 0 + num_steps: int = len(step_requests) + + # sanity check. The first step (prefill-step), must process all requests. + assert step_requests[0] == batch_size + + # Begin assignments from the last step. + output_length: int = num_steps + for num_requests_at_step in reversed(step_requests): + if num_reqs_assigned == batch_size: + break + + assert num_reqs_assigned < batch_size + + # Remove the number of requests that have been determined + # to participate in this step and beyond. + num_reqs_unassigned_at_step = num_requests_at_step - num_reqs_assigned + assert num_reqs_unassigned_at_step >= 0 + + if num_reqs_unassigned_at_step > 0: + ol_nr[output_length] = num_reqs_unassigned_at_step + num_reqs_assigned += num_reqs_unassigned_at_step + + output_length -= 1 + + # sanity checks. + assert sum(ol_nr.values()) == batch_size, \ + ("Number of requests in output-length assignment does not match " + f"batch-size.\n batch size {batch_size} - " + f"step requests {step_requests} - assignments {ol_nr}") + + # Check that the output-length is in [1, num-steps]. Output length must be + # at least 1 as all requests must participate in the prefill-step. + assert all(ol >= 1 and ol <= num_steps for ol in ol_nr), \ + ("Output lengths of requests should be in range " + f"[1, num-engine-steps].\n batch size {batch_size} - " + f"step requests {step_requests} - assignments {ol_nr}") + + return ol_nr + + +def determine_requests_per_step(context: ProfileContext) -> List[int]: + """ + Determine number of requests each engine step should process. + If context.num_steps is set, then all engine steps process the + same number of requests and the output list is of length + context.num_steps. + + If context.complete_num_requests_per_step is set, then each decode step + processes fewer and fewer requests until there are no requests to process. + In this case, the output list is as big as the number of steps + required to process all requests. + + Args: + context: ProfileContext object. + + Returns: + List[int]: Number of requests to process for all engine-steps. + output[i], contains the number of requests that the ith step + should process. + """ + if context.num_steps: + # All requests must run until num_engine_steps. This implies + # that their output lengths must be equal to num_engine_steps. + return [context.batch_size] * context.num_steps + + assert context.complete_num_requests_per_step and \ + context.complete_num_requests_per_step > 0, \ + (f"Expected a positive complete_num_requests_per_step argument." + f"Instead got {context.complete_num_requests_per_step}") + + # We start dropping after the first decode step. + step_requests = [ + context.batch_size, # prefill + context.batch_size, # decode + ] + + num_running_requests = context.batch_size + num_running_requests -= context.complete_num_requests_per_step + while num_running_requests > 0: + step_requests.append(num_running_requests) + num_running_requests -= context.complete_num_requests_per_step + + if step_requests[-1] != 1: + # have 1 request running at the last step. This is often + # useful + step_requests.append(1) + + return step_requests + + def run_profile(context: ProfileContext, csv_output: Optional[str], json_output: Optional[str]): print("Run profile with:") for key, value in asdict(context).items(): print(f" {key} = {value}") + requests_per_step: List[int] = determine_requests_per_step(context) + + ol_nr: OutputLen_NumReqs_Map = compute_request_output_lengths( + context.batch_size, requests_per_step) + + num_steps_to_profile: int = len(requests_per_step) + max_output_len: int = max(ol_nr.keys()) + assert max_output_len >= 1 + # Create sampling params - sampling_params = SamplingParams(temperature=0.8, - top_p=0.95, - max_tokens=args.output_len, - ignore_eos=True) + sampling_params = SamplingParams( + temperature=0.8, + top_p=0.95, + # max_tokens is set on a per-request basis. + max_tokens=None, + ignore_eos=True) # Create LLM llm = LLM(**asdict(context.engine_args)) batch_size = context.batch_size prompt_len = context.prompt_len - output_len = context.output_len scheduler_config = llm.llm_engine.scheduler_config max_model_len = llm.llm_engine.model_config.max_model_len @@ -65,7 +203,7 @@ def run_profile(context: ProfileContext, csv_output: Optional[str], f"choose a smaller batch size or prompt length, or increase " f"--max-num-batched-tokens") sys.exit(-1) - if batch_size >= max_num_seqs: + if batch_size > max_num_seqs: print( f"ERROR: chosen batch_size ({batch_size}) is larger than " f"max_num_seqs ({max_num_seqs}) and therefore cannot be run in a " @@ -73,16 +211,26 @@ def run_profile(context: ProfileContext, csv_output: Optional[str], sys.exit(-1) print("llm.llm_engine.model_config.max_model_len: ", llm.llm_engine.model_config.max_model_len) - if prompt_len + output_len > llm.llm_engine.model_config.max_model_len: - print( - f"ERROR: chosen prompt_len + output_len ({prompt_len} + " - f"{output_len} = {prompt_len + output_len}) is larger than the " - f"model's max_model_len ({max_model_len}), please choose a smaller " - f"prompt_len or output_len, or increase --max-model-len") + if prompt_len + max_output_len > llm.llm_engine.model_config.max_model_len: + print(f"ERROR: chosen prompt_len + max_output_len ({prompt_len} + " + f"{max_output_len} = {prompt_len + max_output_len}) is larger " + f"than the model's max_model_len ({max_model_len}), please " + f"choose a smaller prompt_len or max_output_len, or increase " + f"--max-model-len") sys.exit(-1) def add_requests(): + + def get_output_len_generator() -> Generator[int, Any, Any]: + for output_len, num_reqs in ol_nr.items(): + for _ in range(num_reqs): + yield output_len + + output_len_generator = get_output_len_generator() for i in range(batch_size): + sampling_params.max_tokens = next(output_len_generator) + assert isinstance(sampling_params.max_tokens, int) + prompt_token_ids = torch.randint( llm.llm_engine.model_config.get_vocab_size(), size=(prompt_len, )).tolist() @@ -110,8 +258,11 @@ def abort_requests(): llm.llm_engine.step() # First step is prefill decode_profs = [] - for x in range(args.output_len - 1): - with layerwise_profile() as decode_prof: + for _ in tqdm.tqdm(range(num_steps_to_profile - 1)): + num_running_seqs = llm.llm_engine.scheduler[ + 0].get_num_unfinished_seq_groups() + with layerwise_profile( + num_running_seqs=num_running_seqs) as decode_prof: llm.llm_engine.step() decode_profs.append(decode_prof) @@ -154,7 +305,8 @@ def abort_requests(): decode_results_list[0].print_summary_table() if csv_output: - csv_filename_base = csv_output.rstrip(".csv") + csv_filename_base = csv_output[:-4] \ + if csv_output.endswith('.csv') else csv_output prefill_results.export_model_stats_table_csv( csv_filename_base + "_prefill_model_table.csv") prefill_results.export_summary_stats_table_csv( @@ -187,10 +339,10 @@ def abort_requests(): for idx, dr in enumerate(decode_results_list): json_dict[f"decode_{idx + 1}"] = dr.convert_stats_to_dict() - for idx, dr in enumerate(decode_results_list[1:]): - json_dict[f"decode_{idx + 1}"] = dr.convert_stats_to_dict() - - with open(json_output.rstrip(".json") + ".json", "w+") as f: + # Add .json to json_output filename if it doesn't exist already. + json_output_file = json_output if json_output.endswith( + '.json') else json_output + '.json' + with open(json_output_file, "w+") as f: json.dump(json_dict, f, indent=2) pass @@ -214,7 +366,7 @@ def abort_requests(): python examples/offline_profile.py \\ --model neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8 --batch-size 4 \\ --prompt-len 512 --max-num-batched-tokens 8196 --json Llama31-8b-FP8 \\ - --enforce-eager + --enforce-eager run_num_steps -n 2 ``` then you can use various tools to analyze the json output @@ -261,17 +413,41 @@ def abort_requests(): default=BATCH_SIZE_DEFAULT, help=f"Number of requests to run as a single batch, " f"default={BATCH_SIZE_DEFAULT}") - parser.add_argument( - "--output-len", + + subparsers = parser.add_subparsers(dest="cmd") + + run_num_steps_parser = subparsers.add_parser( + "run_num_steps", + help="This variation profiles n engine.step() invocations.") + run_num_steps_parser.add_argument( + '-n', + '--num-steps', type=int, - default=OUTPUT_LEN_DEFAULT, - help="Number of llm steps to run (includes prefill and decode) " - "- default={OUTPUT_LEN_DEFAULT}") + help="Number of engine steps to profile.\n" + "Setting it to 1, profiles only the prefill step.\n" + "Setting it to 2, profiles the prefill and first decode step\n" + "Setting it to 3, profiles the prefill, 1st and 2nd decode steps\n" + "and so on ...") + + run_to_completion_parser = subparsers.add_parser( + "run_to_completion", + help="This variation profiles all the engine.step() invocations" + "until the engine exhausts all submitted requests.") + run_to_completion_parser.add_argument( + '-n', + '--complete-num-requests-per-step', + type=int, + help= + "Complete complete_num_requests_per_step requests every decode step." + "For e.g., with batch_size 128 and complete_num_requests_per_step 32," + "the profiler is run for 6 engine steps, with the steps processing, " + "128, 128, 96, 64, 32, 1 requests respectively.\n" + "Note that we tack-on a one-request step at the end as it is often " + "useful.") EngineArgs.add_cli_args(parser) args = parser.parse_args() - context = ProfileContext( engine_args=EngineArgs.from_cli_args(args), **{ diff --git a/tools/profiler/print_layerwise_table.py b/tools/profiler/print_layerwise_table.py index 081076ad7dbdc..394ca8663e189 100644 --- a/tools/profiler/print_layerwise_table.py +++ b/tools/profiler/print_layerwise_table.py @@ -34,9 +34,10 @@ def get_entries(node, curr_depth=0): "examples/offline_profile.py") parser.add_argument("--phase", type=str, - choices=["prefill", "decode_1"], required=True, - help="The phase to print the table for.") + help="The phase to print the table for. This is either" + "prefill or decode_n, where n is the decode step " + "number") parser.add_argument("--table", type=str, choices=["summary", "model"], @@ -49,6 +50,10 @@ def get_entries(node, curr_depth=0): with open(args.json_trace) as f: profile_data = json.load(f) + assert args.phase in profile_data, \ + (f"Cannot find phase {args.phase} in profile data. Choose one among" + f'{[x for x in profile_data.keys() if "prefill" in x or "decode" in x]}') #noqa + if args.table == "summary": entries_and_depths = flatten_entries( SummaryStatsEntry, profile_data[args.phase]["summary_stats"]) diff --git a/tools/profiler/visualize_layerwise_profile.py b/tools/profiler/visualize_layerwise_profile.py index adc44474aa4c1..da7a28da15c19 100644 --- a/tools/profiler/visualize_layerwise_profile.py +++ b/tools/profiler/visualize_layerwise_profile.py @@ -151,16 +151,31 @@ def is_quant(op_name: str): "scaled_int8_quant" in op_name: return True + # LoRA ops + def is_sgmv_shrink(op_name: str): + return "sgmv_shrink" in op_name + + def is_sgmv_expand(op_name: str): + return "sgmv_expand" in op_name + + def is_bgmv_shrink(op_name: str): + return "bgmv_shrink" in op_name + + def is_bgmv_expand(op_name: str): + return "bgmv_expand" in op_name + + def is_cutlass_gemm_op(op_name: str): + return "void cutlass::Kernel" in op_name or \ + "void cutlass::device_kernel" in op_name + def is_gemm_op(op_name: str): if is_quant(op_name): return False - if "xmma_gemm" in op_name or \ + return is_cutlass_gemm_op(op_name) or \ + "xmma_gemm" in op_name or \ "gemv2T_kernel" in op_name or \ "splitKreduce" in op_name or \ - "void cutlass::Kernel" in op_name or \ - "void cutlass::device_kernel" in op_name or \ - "s16816gemm" in op_name: - return True + "s16816gemm" in op_name def is_elementwise_op(op_name: str): return "elementwise_kernel" in op_name @@ -211,6 +226,18 @@ def is_reduce_kernel(op_name: str): quant_ops = list(filter(lambda x: is_quant(x), ops)) ops = list(filter(lambda x: x not in quant_ops, ops)) + sgmv_shrink_ops = list(filter(lambda x: is_sgmv_shrink(x), ops)) + ops = list(filter(lambda x: x not in sgmv_shrink_ops, ops)) + sgmv_expand_ops = list(filter(lambda x: is_sgmv_expand(x), ops)) + ops = list(filter(lambda x: x not in sgmv_expand_ops, ops)) + bgmv_shrink_ops = list(filter(lambda x: is_bgmv_shrink(x), ops)) + ops = list(filter(lambda x: x not in bgmv_shrink_ops, ops)) + bgmv_expand_ops = list(filter(lambda x: is_bgmv_expand(x), ops)) + ops = list(filter(lambda x: x not in bgmv_expand_ops, ops)) + + cutlass_gemm_ops = list(filter(lambda x: is_cutlass_gemm_op(x), ops)) + ops = list(filter(lambda x: x not in cutlass_gemm_ops, ops)) + gemm_ops = list(filter(lambda x: is_gemm_op(x), ops)) ops = list(filter(lambda x: x not in gemm_ops, ops)) @@ -257,6 +284,24 @@ def is_reduce_kernel(op_name: str): trace_df['attention'] = trace_df[attention_ops].agg("sum", axis=1) if len(quant_ops): trace_df['quant_ops'] = trace_df[quant_ops].agg("sum", axis=1) + + if len(sgmv_shrink_ops): + trace_df['sgmv_shrink_ops'] = trace_df[sgmv_shrink_ops].agg("sum", + axis=1) + if len(sgmv_expand_ops): + trace_df['sgmv_expand_ops'] = trace_df[sgmv_expand_ops].agg("sum", + axis=1) + if len(bgmv_shrink_ops): + trace_df['bgmv_shrink_ops'] = trace_df[bgmv_shrink_ops].agg("sum", + axis=1) + if len(bgmv_expand_ops): + trace_df['bgmv_expand_ops'] = trace_df[bgmv_expand_ops].agg("sum", + axis=1) + + if len(cutlass_gemm_ops): + trace_df['cutlass_gemm_ops'] = trace_df[cutlass_gemm_ops].agg("sum", + axis=1) + if len(gemm_ops): trace_df['gemm_ops'] = trace_df[gemm_ops].agg("sum", axis=1) if len(rms_norm_ops): @@ -296,7 +341,9 @@ def is_reduce_kernel(op_name: str): trace_df['reduce_kernel_ops'] = trace_df[reduce_kernel_ops].agg("sum", axis=1) - trace_df.drop(attention_ops + quant_ops + gemm_ops + rms_norm_ops + + trace_df.drop(attention_ops + quant_ops + sgmv_shrink_ops + + sgmv_expand_ops + bgmv_shrink_ops + bgmv_expand_ops + + cutlass_gemm_ops + gemm_ops + rms_norm_ops + vocab_embed_ops + mem_ops + elementwise_ops + nccl_all_reduce_ops + nccl_gather_ops + nccl_broadcast_ops + nccl_other_ops + cross_device_reduce_1stage_ops + @@ -315,7 +362,14 @@ def plot_trace_df(traces_df: pd.DataFrame, plot_title: str, output: Optional[Path] = None): + def get_phase_description(traces_df: pd.DataFrame, phase: str) -> str: + phase_df = traces_df.query(f'phase == "{phase}"') + descs = phase_df['phase_desc'].to_list() + assert all([desc == descs[0] for desc in descs]) + return descs[0] + phases = traces_df['phase'].unique() + phase_descs = [get_phase_description(traces_df, p) for p in phases] traces_df = traces_df.pivot_table(index="phase", columns="name", values=plot_metric, @@ -324,7 +378,8 @@ def plot_trace_df(traces_df: pd.DataFrame, traces_df = group_trace_by_operations(traces_df) # Make the figure - fig, ax = plt.subplots(1, figsize=(5, 8), sharex=True) + fig_size_x = max(5, len(phases)) + fig, ax = plt.subplots(1, figsize=(fig_size_x, 8), sharex=True) # Draw the stacked bars ops = list(traces_df) @@ -332,7 +387,7 @@ def plot_trace_df(traces_df: pd.DataFrame, for op in ops: values = [traces_df[op][phase] for phase in phases] values = list(map(lambda x: 0.0 if math.isnan(x) else x, values)) - ax.bar(phases, values, label=op, bottom=bottom) + ax.bar(phase_descs, values, label=op, bottom=bottom) bottom = [bottom[j] + values[j] for j in range(len(phases))] # Write the values as text on the bars @@ -390,6 +445,14 @@ def keep_only_top_entries(df: pd.DataFrame, ["name"]] = "others" return df + def get_phase_description(key: str) -> str: + num_running_seqs = profile_json[key]['metadata'][ + 'num_running_seqs'] + if num_running_seqs is not None: + return f"{key}-seqs-{num_running_seqs}" + else: + return key + # Get data for each key traces = list(map(lambda x: get_entries_and_traces(x), step_keys)) @@ -413,6 +476,7 @@ def keep_only_top_entries(df: pd.DataFrame, # Fill in information about the step-keys for trace_df, step_key in zip(trace_dfs, step_keys): trace_df['phase'] = step_key + trace_df['phase_desc'] = get_phase_description(step_key) # Combine all data frames so they can be put in a single plot traces_df = pd.concat(trace_dfs) @@ -426,12 +490,16 @@ def keep_only_top_entries(df: pd.DataFrame, def make_plot_title_suffix(profile_json: dict) -> str: context = profile_json["context"] sparsity = context.get('sparsity', None) - return (f"{context['model']}\n" + run_type = \ + f'Run {context["num_steps"]} steps' if context['num_steps'] else \ + (f'Complete {context["complete_num_requests_per_step"]} per ' + f'step; Run till completion') + return (f"{context['engine_args']['model']}\n" f"Batch={context['batch_size']}, " f"PromptLen={context['prompt_len']}, " - f"OutputLen={context['output_len']}," - f"NumGpus={context['tensor_parallel_size']}" - f"{', Sparsity ' + sparsity if sparsity else ''}") + f"NumGpus={context['engine_args']['tensor_parallel_size']}" + f"{', Sparsity ' + sparsity if sparsity else ''}\n" + f"Run Type: {run_type}") profile_json = None with open(json_trace) as f: diff --git a/vllm/profiler/layerwise_profile.py b/vllm/profiler/layerwise_profile.py index 9d9f427e807f6..33babfebdca1e 100644 --- a/vllm/profiler/layerwise_profile.py +++ b/vllm/profiler/layerwise_profile.py @@ -72,6 +72,9 @@ class LayerwiseProfileResults(profile): _model_stats_tree: List[_StatsTreeNode] = field(init=False) _summary_stats_tree: List[_StatsTreeNode] = field(init=False) + # profile metadata + num_running_seqs: Optional[int] = None + def __post_init__(self): self._build_correlation_map() self._build_module_tree() @@ -127,6 +130,9 @@ def export_summary_stats_table_csv(self, filename: str): def convert_stats_to_dict(self) -> str: return { + "metadata": { + "num_running_seqs": self.num_running_seqs + }, "summary_stats": self._convert_stats_tree_to_dict(self._summary_stats_tree), "model_stats": @@ -338,7 +344,15 @@ def df_traversal(node: _StatsTreeNode, curr_json_list: List[Dict]): class layerwise_profile(profile): - def __init__(self): + def __init__(self, num_running_seqs: Optional[int] = None): + """ + layerwise profile constructor. + + Args: + num_running_seqs (Optional[int], optional): When given, + num_running_seqs will be passed to LayerProfileResults for metadata + update. Defaults to None. + """ super().__init__( activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, @@ -346,9 +360,13 @@ def __init__(self): with_modules=True, experimental_config=_ExperimentalConfig(verbose=True)) + self.num_running_seqs = num_running_seqs + def __enter__(self): return super().__enter__() def __exit__(self, exc_type, exc_val, exc_tb): super().__exit__(exc_type, exc_val, exc_tb) - self.results = LayerwiseProfileResults(self.profiler.kineto_results) + self.results = LayerwiseProfileResults( + self.profiler.kineto_results, + num_running_seqs=self.num_running_seqs) From 551603feffd9b4ba98ccdd34e02e403e04db88c1 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Mon, 16 Dec 2024 13:32:25 -0800 Subject: [PATCH 16/72] [core] overhaul memory profiling and fix backward compatibility (#10511) Signed-off-by: youkaichao --- tests/entrypoints/llm/test_gpu_utilization.py | 25 ++++ tests/entrypoints/llm/test_lazy_outlines.py | 2 +- tests/test_utils.py | 44 +++++- tests/worker/test_profile.py | 18 +-- vllm/engine/arg_utils.py | 11 +- vllm/utils.py | 125 +++++++++++++++++- vllm/worker/multi_step_model_runner.py | 3 +- vllm/worker/worker.py | 68 ++++------ 8 files changed, 236 insertions(+), 60 deletions(-) create mode 100644 tests/entrypoints/llm/test_gpu_utilization.py diff --git a/tests/entrypoints/llm/test_gpu_utilization.py b/tests/entrypoints/llm/test_gpu_utilization.py new file mode 100644 index 0000000000000..c2dab300ecefb --- /dev/null +++ b/tests/entrypoints/llm/test_gpu_utilization.py @@ -0,0 +1,25 @@ +from vllm import LLM, SamplingParams + + +def test_gpu_memory_utilization(): + prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", + ] + sampling_params = SamplingParams(temperature=0.8, top_p=0.95) + + # makes sure gpu_memory_utilization is per-instance limit, + # not a global limit + llms = [ + LLM(model="facebook/opt-125m", + gpu_memory_utilization=0.3, + enforce_eager=True) for i in range(3) + ] + for llm in llms: + outputs = llm.generate(prompts, sampling_params) + for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") diff --git a/tests/entrypoints/llm/test_lazy_outlines.py b/tests/entrypoints/llm/test_lazy_outlines.py index 2c53676c5f5dd..bf609b38a94f5 100644 --- a/tests/entrypoints/llm/test_lazy_outlines.py +++ b/tests/entrypoints/llm/test_lazy_outlines.py @@ -36,7 +36,7 @@ def run_lmfe(sample_regex): llm = LLM(model="facebook/opt-125m", enforce_eager=True, guided_decoding_backend="lm-format-enforcer", - gpu_memory_utilization=0.6) + gpu_memory_utilization=0.3) sampling_params = SamplingParams(temperature=0.8, top_p=0.95) outputs = llm.generate( prompts=[ diff --git a/tests/test_utils.py b/tests/test_utils.py index a731b11eae81c..0bc9e5bc32a46 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,11 +5,13 @@ from typing import AsyncIterator, Tuple import pytest +import torch from vllm.utils import (FlexibleArgumentParser, StoreBoolean, deprecate_kwargs, - get_open_port, merge_async_iterators, supports_kw) + get_open_port, memory_profiling, merge_async_iterators, + supports_kw) -from .utils import error_on_warning +from .utils import error_on_warning, fork_new_process_for_each_test @pytest.mark.asyncio @@ -270,3 +272,41 @@ def test_supports_kw(callable,kw_name,requires_kw_only, requires_kw_only=requires_kw_only, allow_var_kwargs=allow_var_kwargs ) == is_supported + + +@fork_new_process_for_each_test +def test_memory_profiling(): + # Fake out some model loading + inference memory usage to test profiling + # Memory used by other processes will show up as cuda usage outside of torch + from vllm.distributed.device_communicators.cuda_wrapper import ( + CudaRTLibrary) + lib = CudaRTLibrary() + # 512 MiB allocation outside of this instance + handle1 = lib.cudaMalloc(512 * 1024 * 1024) + + baseline_memory_in_bytes = \ + torch.cuda.mem_get_info()[1] - torch.cuda.mem_get_info()[0] + + # load weights + + weights = torch.randn(128, 1024, 1024, device='cuda', dtype=torch.float32) + + weights_memory_in_bytes = 128 * 1024 * 1024 * 4 # 512 MiB + + with memory_profiling(baseline_memory_in_bytes=baseline_memory_in_bytes, + weights_memory_in_bytes=weights_memory_in_bytes) as result: + # make a memory spike, 1 GiB + spike = torch.randn(256, 1024, 1024, device='cuda', dtype=torch.float32) + del spike + + # Add some extra non-torch memory 256 MiB (simulate NCCL) + handle2 = lib.cudaMalloc(256 * 1024 * 1024) + + # Check that the memory usage is within 5% of the expected values + non_torch_ratio = result.non_torch_increase_in_bytes / (256 * 1024 * 1024) # noqa + torch_peak_ratio = result.torch_peak_increase_in_bytes / (1024 * 1024 * 1024) # noqa + assert abs(non_torch_ratio - 1) <= 0.05 + assert abs(torch_peak_ratio - 1) <= 0.05 + del weights + lib.cudaFree(handle1) + lib.cudaFree(handle2) diff --git a/tests/worker/test_profile.py b/tests/worker/test_profile.py index 194ea2aa506f4..79233c75714de 100644 --- a/tests/worker/test_profile.py +++ b/tests/worker/test_profile.py @@ -31,10 +31,6 @@ def test_gpu_memory_profiling(): is_driver_worker=True, ) - # Load the model so we can profile it - worker.init_device() - worker.load_model() - # Set 10GiB as the total gpu ram to be device-agnostic def mock_mem_info(): current_usage = torch.cuda.memory_stats( @@ -46,20 +42,24 @@ def mock_mem_info(): from unittest.mock import patch with patch("torch.cuda.mem_get_info", side_effect=mock_mem_info): + # Load the model so we can profile it + worker.init_device() + worker.load_model() gpu_blocks, _ = worker.determine_num_available_blocks() - # Peak vram usage by torch should be 0.7077 GiB + # Peak vram usage by torch should be 0.47 GiB + # Model weights take 0.25 GiB # No memory should be allocated outside of torch # 9.0 GiB should be the utilization target - # 8.2923 GiB should be available for the KV cache + # 8.28 GiB should be available for the KV cache block_size = CacheEngine.get_cache_block_size( engine_config.cache_config, engine_config.model_config, engine_config.parallel_config) - expected_blocks = (8.2923 * 1024**3) // block_size + expected_blocks = (8.28 * 1024**3) // block_size # Check within a small tolerance for portability # Hardware, kernel, or dependency changes could all affect memory # utilization. - # A 10 block tolerance here should be about 6MB of wiggle room. - assert abs(gpu_blocks - expected_blocks) < 10 + # A 100 block tolerance here should be about 60MB of wiggle room. + assert abs(gpu_blocks - expected_blocks) < 100 diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 0aa367a173b6c..06b8542779dc0 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -487,11 +487,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: help='The fraction of GPU memory to be used for the model ' 'executor, which can range from 0 to 1. For example, a value of ' '0.5 would imply 50%% GPU memory utilization. If unspecified, ' - 'will use the default value of 0.9. This is a global gpu memory ' - 'utilization limit, for example if 50%% of the gpu memory is ' - 'already used before vLLM starts and --gpu-memory-utilization is ' - 'set to 0.9, then only 40%% of the gpu memory will be allocated ' - 'to the model executor.') + 'will use the default value of 0.9. This is a per-instance ' + 'limit, and only applies to the current vLLM instance.' + 'It does not matter if you have another vLLM instance running ' + 'on the same GPU. For example, if you have two vLLM instances ' + 'running on the same GPU, you can set the GPU memory utilization ' + 'to 0.5 for each instance.') parser.add_argument( '--num-gpu-blocks-override', type=int, diff --git a/vllm/utils.py b/vllm/utils.py index 45e682ac15782..73d2ae25f15ca 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -23,10 +23,12 @@ from asyncio import FIRST_COMPLETED, AbstractEventLoop, Future, Task from collections import UserDict, defaultdict from collections.abc import Iterable, Mapping +from dataclasses import dataclass, field from functools import lru_cache, partial, wraps from typing import (TYPE_CHECKING, Any, AsyncGenerator, Awaitable, Callable, - Dict, Generic, Hashable, List, Literal, Optional, - OrderedDict, Set, Tuple, Type, TypeVar, Union, overload) + Dict, Generator, Generic, Hashable, List, Literal, + Optional, OrderedDict, Set, Tuple, Type, TypeVar, Union, + overload) from uuid import uuid4 import numpy as np @@ -1664,3 +1666,122 @@ def kill_process_tree(pid: int): # Finally kill the parent with contextlib.suppress(ProcessLookupError): os.kill(pid, signal.SIGKILL) + + +@dataclass +class MemorySnapshot: + """Memory snapshot.""" + torch_peak_in_bytes: int = 0 + torch_memory_in_bytes: int = 0 + timestamp: float = 0.0 + + def measure(self): + self.torch_peak_in_bytes = torch.cuda.memory_stats( + )["allocated_bytes.all.peak"] + self.torch_memory_in_bytes = torch.cuda.memory_stats( + )["allocated_bytes.all.current"] + self.timestamp = time.time() + + def __sub__(self, other: "MemorySnapshot") -> "MemorySnapshot": + """support a - b""" + return MemorySnapshot( + torch_peak_in_bytes=self.torch_peak_in_bytes - + other.torch_peak_in_bytes, + torch_memory_in_bytes=self.torch_memory_in_bytes - + other.torch_memory_in_bytes, + timestamp=self.timestamp - other.timestamp) + + +@dataclass +class MemoryProfilingResult: + """Memory profiling result. + """ # noqa + baseline_memory_in_bytes: int = 0 + non_kv_cache_memory_in_bytes: int = 0 + torch_peak_increase_in_bytes: int = 0 + non_torch_increase_in_bytes: int = 0 + weights_memory_in_bytes: float = 0 + before_profile: MemorySnapshot = field(default_factory=MemorySnapshot) + after_profile: MemorySnapshot = field(default_factory=MemorySnapshot) + profile_time: float = 0.0 + + +@contextlib.contextmanager +def memory_profiling( + baseline_memory_in_bytes: int, weights_memory_in_bytes: int +) -> Generator[MemoryProfilingResult, None, None]: + """Memory profiling context manager. + baseline_memory_in_bytes: memory used by all the components other than + the current vLLM instance. It contains: memory used by other processes, memory + used by another vLLM instance in the same process, etc. It is usually measured + before the current vLLM instance initialize the device. And we assume it is + constant during the profiling of the current vLLM instance. + weights_memory_in_bytes: memory used by PyTorch when loading the model weights. + Note that, before loading the model weights, we also initialize the device + and distributed environment, which may consume some memory. This part is not + included in the weights_memory_in_bytes because PyTorch does not control it. + + The memory in one GPU can be classified into 3 categories: + 1. memory used by anything other than the current vLLM instance. + 2. memory used by torch in the current vLLM instance. + 3. memory used in the current vLLM instance, but not by torch. + + A quantitive example: + + Before creating the current vLLM instance: + category 1: 1 GiB + category 2: 0 GiB + category 3: 0 GiB + + After creating the current vLLM instance and loading the model, + (i.e. before profiling): + category 1: 1 GiB + category 2: 2 GiB (model weights take 2 GiB) + category 3: 0.5 GiB (memory used by NCCL) + + During profiling (peak): + category 1: 1 GiB + category 2: 4 GiB (peak activation tensors take 2 GiB) + category 3: 1 GiB (memory used by NCCL + buffers for some attention backends) + + After profiling: + category 1: 1 GiB + category 2: 3 GiB (after garbage-collecting activation tensors) + category 3: 1 GiB (memory used by NCCL + buffers for some attention backends) + + In this case, non-kv cache takes 5 GiB in total, including: + a. 2 GiB used by the model weights (category 2) + b. 2 GiB reserved for the peak activation tensors (category 2) + c. 1 GiB used by non-torch components (category 3) + + The memory used for loading weights (a.) is directly given from the argument `weights_memory_in_bytes`. + + The increase of ``torch.cuda.memory_stats()["allocated_bytes.all.peak"]` after profiling gives (b.). + + (c.) is tricky. We measure the total memory used in this GPU (`torch.cuda.mem_get_info()[1] - torch.cuda.mem_get_info()[0]`), + subtract the baseline memory, the memory used by the model weights, and diff of `torch.cuda.memory_stats()["allocated_bytes.all.current"]`. + """ # noqa + torch.cuda.reset_peak_memory_stats() + + result = MemoryProfilingResult() + + result.baseline_memory_in_bytes = baseline_memory_in_bytes + # the part of memory used for holding the model weights + result.weights_memory_in_bytes = weights_memory_in_bytes + + result.before_profile.measure() + + yield result + + gc.collect() + torch.cuda.empty_cache() + + result.after_profile.measure() + + diff = result.after_profile - result.before_profile + result.torch_peak_increase_in_bytes = diff.torch_peak_in_bytes + current_cuda_memory_bytes = torch.cuda.mem_get_info( + )[1] - torch.cuda.mem_get_info()[0] + result.non_torch_increase_in_bytes = current_cuda_memory_bytes - baseline_memory_in_bytes - weights_memory_in_bytes - diff.torch_memory_in_bytes # noqa + result.profile_time = diff.timestamp + result.non_kv_cache_memory_in_bytes = result.non_torch_increase_in_bytes + result.torch_peak_increase_in_bytes + result.weights_memory_in_bytes # noqa diff --git a/vllm/worker/multi_step_model_runner.py b/vllm/worker/multi_step_model_runner.py index e08a61e31fe42..18b03bf1bfb56 100644 --- a/vllm/worker/multi_step_model_runner.py +++ b/vllm/worker/multi_step_model_runner.py @@ -645,7 +645,8 @@ def _advance_step(self, model_input: StatefulModelInput, return model_input def load_model(self) -> None: - return self._base_model_runner.load_model() + self._base_model_runner.load_model() + self.model_memory_usage = self._base_model_runner.model_memory_usage def save_sharded_state( self, diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py index a368bb9ee9a5b..f51b51d433d3d 100644 --- a/vllm/worker/worker.py +++ b/vllm/worker/worker.py @@ -1,7 +1,6 @@ """A GPU worker class.""" import gc import os -import time from typing import Dict, List, Optional, Set, Tuple, Type, Union import torch @@ -22,6 +21,7 @@ from vllm.prompt_adapter.request import PromptAdapterRequest from vllm.sequence import (ExecuteModelRequest, IntermediateTensors, SequenceGroupMetadata, SequenceGroupMetadataDelta) +from vllm.utils import GiB_bytes, memory_profiling from vllm.worker.cache_engine import CacheEngine from vllm.worker.enc_dec_model_runner import EncoderDecoderModelRunner from vllm.worker.model_runner import GPUModelRunnerBase, ModelRunner @@ -192,33 +192,22 @@ def determine_num_available_blocks(self) -> Tuple[int, int]: torch.cuda.reset_peak_memory_stats() free_memory_pre_profile, total_gpu_memory = torch.cuda.mem_get_info() - start_time = time.time() # Execute a forward pass with dummy inputs to profile the memory usage # of the model. - self.model_runner.profile_run() - torch.cuda.synchronize() + with memory_profiling(baseline_memory_in_bytes=total_gpu_memory - + self.init_gpu_memory, + weights_memory_in_bytes=self.model_runner. + model_memory_usage) as result: + self.model_runner.profile_run() + torch.cuda.synchronize() self._assert_memory_footprint_increased_during_profiling() - # Get the peak memory allocation recorded by torch - peak_memory = torch.cuda.memory_stats()["allocated_bytes.all.peak"] - - # Check for any memory left around that may have been allocated on the - # gpu outside of `torch`. NCCL operations, for example, can use a few - # GB during a forward pass - torch.cuda.empty_cache() - torch_allocated_bytes = torch.cuda.memory_stats( - )["allocated_bytes.all.current"] - total_allocated_bytes = torch.cuda.mem_get_info( - )[1] - torch.cuda.mem_get_info()[0] - non_torch_allocations = total_allocated_bytes - torch_allocated_bytes - if non_torch_allocations > 0: - peak_memory += non_torch_allocations - - available_kv_cache_memory = ( - total_gpu_memory * self.cache_config.gpu_memory_utilization - - peak_memory) + memory_for_current_instance = total_gpu_memory * \ + self.cache_config.gpu_memory_utilization + available_kv_cache_memory = (memory_for_current_instance - + result.non_kv_cache_memory_in_bytes) # Calculate the number of blocks that can be allocated with the # profiled peak memory. @@ -233,24 +222,23 @@ def determine_num_available_blocks(self) -> Tuple[int, int]: num_gpu_blocks = max(num_gpu_blocks, 0) num_cpu_blocks = max(num_cpu_blocks, 0) - end_time = time.time() - logger.info( - "Memory profiling results: " - "duration=%.2f seconds, " - "total_gpu_memory=%.2fGiB, " - "initial_memory_usage=%.2fGiB, " - "peak_torch_memory=%.2fGiB, " - "memory_usage_post_profile=%.2fGiB, " - "non_torch_memory=%.2fGiB, " - "kv_cache_size=%.2fGiB, " - "gpu_memory_utilization=%.2f.", end_time - start_time, - total_gpu_memory / (1024**3), - (total_gpu_memory - free_memory_pre_profile) / (1024**3), - (peak_memory - non_torch_allocations) / (1024**3), - total_allocated_bytes / (1024**3), - non_torch_allocations / (1024**3), - available_kv_cache_memory / (1024**3), - self.cache_config.gpu_memory_utilization) + msg = (f"Memory profiling takes {result.profile_time:.2f} seconds\n" + "the current vLLM instance can use " + "total_gpu_memory " + f"({(total_gpu_memory / GiB_bytes):.2f}GiB)" + " x gpu_memory_utilization " + f"({self.cache_config.gpu_memory_utilization:.2f})" + f" = {(memory_for_current_instance / GiB_bytes):.2f}GiB\n" + "model weights take " + f"{(result.weights_memory_in_bytes / GiB_bytes):.2f}GiB;" + " non_torch_memory takes " + f"{(result.non_torch_increase_in_bytes / GiB_bytes):.2f}GiB;" + " PyTorch activation peak memory takes " + f"{(result.torch_peak_increase_in_bytes / GiB_bytes):.2f}GiB;" + " the rest of the memory reserved for KV Cache is " + f"{(available_kv_cache_memory / GiB_bytes):.2f}GiB.") + + logger.info(msg) # Final cleanup if self.model_runner.lora_manager: From 35ffa682b1cd3f47eb6cda586a16dab5c0401477 Mon Sep 17 00:00:00 2001 From: bk-TurbaAI Date: Mon, 16 Dec 2024 23:20:39 +0100 Subject: [PATCH 17/72] [Docs] hint to enable use of GPU performance counters in profiling tools for multi-node distributed serving (#11235) Co-authored-by: Michael Goin --- docs/source/serving/distributed_serving.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/serving/distributed_serving.rst b/docs/source/serving/distributed_serving.rst index 4d57206e53a05..b24ba53e59694 100644 --- a/docs/source/serving/distributed_serving.rst +++ b/docs/source/serving/distributed_serving.rst @@ -54,7 +54,7 @@ Multi-Node Inference and Serving If a single node does not have enough GPUs to hold the model, you can run the model using multiple nodes. It is important to make sure the execution environment is the same on all nodes, including the model path, the Python environment. The recommended way is to use docker images to ensure the same environment, and hide the heterogeneity of the host machines via mapping them into the same docker configuration. -The first step, is to start containers and organize them into a cluster. We have provided a helper `script `_ to start the cluster. +The first step, is to start containers and organize them into a cluster. We have provided a helper `script `_ to start the cluster. Please note, this script launches docker without administrative privileges that would be required to access GPU performance counters when running profiling and tracing tools. For that purpose, the script can have ``CAP_SYS_ADMIN`` to the docker container by using the ``--cap-add`` option in the docker run command. Pick a node as the head node, and run the following command: From c301616ed23fef433db1a49df332b9d61d3178ad Mon Sep 17 00:00:00 2001 From: youkaichao Date: Mon, 16 Dec 2024 15:53:18 -0800 Subject: [PATCH 18/72] [ci][tests] add gh200 tests (#11244) Signed-off-by: youkaichao --- .buildkite/run-gh200-test.sh | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .buildkite/run-gh200-test.sh diff --git a/.buildkite/run-gh200-test.sh b/.buildkite/run-gh200-test.sh new file mode 100644 index 0000000000000..d25510c47fe6b --- /dev/null +++ b/.buildkite/run-gh200-test.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# This script build the GH200 docker image and run the offline inference inside the container. +# It serves a sanity check for compilation and basic model usage. +set -ex + +# Try building the docker image +DOCKER_BUILDKIT=1 docker build . \ + --target test \ + -platform "linux/arm64" \ + -t gh200-test \ + --build-arg max_jobs=66 \ + --build-arg nvcc_threads=2 \ + --build-arg torch_cuda_arch_list="9.0+PTX" \ + --build-arg vllm_fa_cmake_gpu_arches="90-real" + +# Setup cleanup +remove_docker_container() { docker rm -f gh200-test || true; } +trap remove_docker_container EXIT +remove_docker_container + +# Run the image and test offline inference +docker run --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c ' + python3 examples/offline_inference.py +' From 88a412ed3d964de3443c42a6a35108115ee0ad25 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Mon, 16 Dec 2024 16:15:22 -0800 Subject: [PATCH 19/72] [torch.compile] fast inductor (#11108) Signed-off-by: youkaichao Co-authored-by: Tyler Michael Smith --- vllm/compilation/backends.py | 213 +++++++++++++++++- vllm/config.py | 415 ++++++++++++++++++++++++++++++++++- vllm/envs.py | 3 + 3 files changed, 624 insertions(+), 7 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 4a5dc337d01b8..0c7bbfe599b02 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -1,6 +1,10 @@ +import ast import copy import dataclasses +import os +import pprint import time +from collections import defaultdict from contextlib import ExitStack from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple from unittest.mock import patch @@ -21,6 +25,122 @@ logger = init_logger(__name__) +class InductorHashCache: + """ + Disk format: a Python list of tuples, each tuple is + (runtime_shape, graph_index, hash_str) + We use list of tuple for readability. + + In-memory format: a defaultdict of dict, where the key is + runtime_shape, and the value is a dict of graph_index to hash_str. + + The data is essentially `Dict[Optional[int], Dict[int, str]]`, + we don't use json here because json doesn't support int as key. + + TODO: better off-the-shelf solution to serialize the data? + """ + + def __init__(self, cache_dir: str, disabled: bool = False): + self.cache: defaultdict = defaultdict(dict) + self.disabled = disabled + self.cache_dir = cache_dir + self.cache_file_path = os.path.join(cache_dir, + "inductor_hash_cache.py") + if disabled: + return + # set flags so that Inductor and Triton store their cache + # in the cache_dir, then users only need to copy the cache_dir + # to another machine to reuse the cache. + inductor_cache = os.path.join(cache_dir, "inductor_cache") + os.makedirs(inductor_cache, exist_ok=True) + os.environ["TORCHINDUCTOR_CACHE_DIR"] = inductor_cache + triton_cache = os.path.join(cache_dir, "triton_cache") + os.makedirs(triton_cache, exist_ok=True) + os.environ["TRITON_CACHE_DIR"] = triton_cache + if os.path.exists(self.cache_file_path): + with open(self.cache_file_path) as f: + self.deserialize(f.read()) + + def deserialize(self, data: str): + # we use ast.literal_eval to parse the data + # because it is a safe way to parse Python literals. + # do not use eval(), it is unsafe. + list_data = ast.literal_eval(data) + for runtime_shape, graph_index, hash_str in list_data: + self.cache[runtime_shape][graph_index] = hash_str + + def serialize(self) -> str: + data = [] + for runtime_shape, graph_index_to_hash_str in self.cache.items(): + for graph_index, hash_str in graph_index_to_hash_str.items(): + data.append((runtime_shape, graph_index, hash_str)) + printer = pprint.PrettyPrinter(indent=4) + return printer.pformat(data) + + def save_to_file(self): + if self.disabled: + return + with open(self.cache_file_path, "w") as f: + f.write(self.serialize()) + + def __contains__(self, key: Tuple[Optional[int], int]) -> bool: + if self.disabled: + return False + runtime_shape, graph_index = key + return runtime_shape in self.cache and graph_index in self.cache[ + runtime_shape] + + def __getitem__(self, key: Tuple[Optional[int], int]) -> str: + if self.disabled: + raise KeyError("cannot read from disabled cache") + runtime_shape, graph_index = key + return self.cache[runtime_shape][graph_index] + + def __setitem__(self, key: Tuple[Optional[int], int], value: str): + # setitem for disabled cache is fine, because we + # don't actually write to the disk + runtime_shape, graph_index = key + self.cache[runtime_shape][graph_index] = value + + +class AlwaysHitShapeEnv: + """ + Why do we need this class: + + For normal `torch.compile` usage, every compilation will have + one Dynamo bytecode compilation and one Inductor compilation. + The Inductor compilation happens under the context of the + Dynamo bytecode compilation, and that context is used to + determine the dynamic shape information, etc. + + For our use case, we only run Dynamo bytecode compilation once, + and run Inductor compilation multiple times with different shapes + plus a general shape. The compilation for specific shapes happens + outside of the context of the Dynamo bytecode compilation. At that + time, we don't have shape environment to provide to Inductor, and + it will fail the Inductor code cache lookup. + + By providing a dummy shape environment that always hits, we can + make the Inductor code cache lookup always hit, and we can + compile the graph for different shapes as needed. + + The following dummy methods are obtained by trial-and-error + until it works. + """ + + def __init__(self) -> None: + self.guards: List[Any] = [] + + def evaluate_guards_expression(self, *args, **kwargs): + return True + + def get_pruned_guards(self, *args, **kwargs): + return [] + + def produce_guards_expression(self, *args, **kwargs): + return "" + + def wrap_inductor(graph, example_inputs, additional_inductor_config, @@ -55,9 +175,93 @@ def wrap_inductor(graph, # inductor can inplace modify the graph, so we need to copy it # see https://github.com/pytorch/pytorch/issues/138980 graph = copy.deepcopy(graph) - compiled_graph = compile_fx(graph, - example_inputs, - config_patches=current_config) + + cache_data = compilation_config.inductor_hash_cache + if (runtime_shape, graph_index) in cache_data: + # we compiled this graph before + # so we can directly lookup the compiled graph via hash + hash_str = cache_data[(runtime_shape, graph_index)] + if graph_index == 0: + # adds some info logging for the first graph + logger.info( + "Directly lookup the graph for shape %s from the cache", + str(runtime_shape)) # noqa + logger.debug( + "directly lookup the %s-th graph for shape %s via hash %s", + graph_index, str(runtime_shape), hash_str) + from torch._inductor.codecache import FxGraphCache + with patch("torch._inductor.codecache.FxGraphCache._get_shape_env", + lambda *args, **kwargs: AlwaysHitShapeEnv()): + inductor_compiled_graph = FxGraphCache._lookup_graph( + hash_str, example_inputs, True, False) + assert inductor_compiled_graph is not None, ( + "Inductor cache lookup failed. Please remove" + f"the cache file {compilation_config.inductor_hash_cache.cache_file_path} and try again." # noqa + ) + + # Inductor calling convention (function signature): + # f(list) -> tuple + # Dynamo calling convention (function signature): + # f(*args) -> Any + + # need to know if the graph returns a tuple + from torch._inductor.compile_fx import graph_returns_tuple + returns_tuple = graph_returns_tuple(graph) + + # this is the graph we return to Dynamo to run + def compiled_graph(*args): + # convert args to list + list_args = list(args) + graph_output = inductor_compiled_graph(list_args) + # unpack the tuple if needed + if returns_tuple: + return graph_output + else: + return graph_output[0] + else: + # it's the first time we compile this graph + # the assumption is that we don't have nested Inductor compilation. + # compiled_fx_graph_hash will only be called once, and we can hook + # it to get the hash of the compiled graph directly. + from torch._inductor.codecache import compiled_fx_graph_hash + + def hijack_compiled_fx_graph_hash(*args, **kwargs): + out = compiled_fx_graph_hash(*args, **kwargs) + # store the hash in the cache + nonlocal cache_data + cache_data[(runtime_shape, graph_index)] = out[0] + if graph_index == 0: + # adds some info logging for the first graph + logger.info("Cache the graph of shape %s for later use", + str(runtime_shape)) + logger.debug("store the %s-th graph for shape %s via hash %s", + graph_index, str(runtime_shape), out[0]) + return out + + def _check_can_cache(*args, **kwargs): + # no error means it can be cached. + # Inductor refuses to cache the graph outside of Dynamo + # tracing context, and also disables caching for graphs + # with high-order ops. + # For vLLM, in either case, we want to cache the graph. + # see https://github.com/pytorch/pytorch/blob/9f5ebf3fc609105a74eab4ccc24932d6353ff566/torch/_inductor/codecache.py#L1221 # noqa + return + + def _get_shape_env(): + return AlwaysHitShapeEnv() + + with patch(# for hijacking the hash of the compiled graph + "torch._inductor.codecache.compiled_fx_graph_hash", + hijack_compiled_fx_graph_hash), \ + patch(# for providing a dummy shape environment + "torch._inductor.codecache.FxGraphCache._get_shape_env", + _get_shape_env), \ + patch(# for forcing the graph to be cached + "torch._inductor.codecache.FxGraphCache._check_can_cache", + _check_can_cache): + compiled_graph = compile_fx(graph, + example_inputs, + config_patches=current_config) # after compiling the last graph, record the end time if graph_index == num_graphs - 1: @@ -457,6 +661,9 @@ def __call__(self, *args) -> Any: # finished compilations for all required shapes if self.is_last_graph and not self.to_be_compiled_sizes: + + # save the hash of the inductor graph for the next run + self.compilation_config.inductor_hash_cache.save_to_file() end_monitoring_torch_compile(self.vllm_config) if not entry.use_cudagraph: diff --git a/vllm/config.py b/vllm/config.py index fce8011be4015..9cfd08024ea7b 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -3,6 +3,7 @@ import enum import hashlib import json +import os import warnings from contextlib import contextmanager from dataclasses import dataclass, field, replace @@ -162,6 +163,30 @@ class ModelConfig: which allows no processors. """ + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + factors: List[Any] = [] + factors.append(self.model) + factors.append(self.dtype) + factors.append(self.quantization) + factors.append(self.quantization_param_path) + factors.append(self.revision) + factors.append(self.code_revision) + factors.append(self.trust_remote_code) + factors.append(self.rope_scaling) + factors.append(self.rope_theta) + return hashlib.sha256(str(factors).encode()).hexdigest() + def __init__(self, model: str, task: Union[TaskOption, Literal["draft"]], @@ -203,6 +228,8 @@ def __init__(self, self.seed = seed self.revision = revision self.code_revision = code_revision + self.rope_scaling = rope_scaling + self.rope_theta = rope_theta if hf_overrides is None: hf_overrides = {} @@ -832,6 +859,24 @@ class CacheConfig: cpu_offload_gb: Size of the CPU offload buffer in GiB. """ + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + factors: List[Any] = [] + factors.append(self.cache_dtype) + # `cpu_offload_gb` does not use `torch.compile` yet. + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + def __init__( self, block_size: int, @@ -928,6 +973,24 @@ class TokenizerPoolConfig: pool_type: Union[str, Type["BaseTokenizerGroup"]] extra_config: dict + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + def __post_init__(self): if self.pool_type not in ("ray", ) and not isinstance( self.pool_type, type): @@ -1010,6 +1073,24 @@ class LoadConfig: default_factory=dict) ignore_patterns: Optional[Union[List[str], str]] = None + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + def __post_init__(self): model_loader_extra_config = self.model_loader_extra_config or {} if isinstance(model_loader_extra_config, str): @@ -1073,6 +1154,19 @@ class ParallelConfig: rank: int = 0 + def compute_hash(self): + """ + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + factors: List[Any] = [] + factors.append(self.pipeline_parallel_size) + factors.append(self.tensor_parallel_size) + return hashlib.sha256(str(factors).encode()).hexdigest() + def __post_init__(self) -> None: self.world_size = self.pipeline_parallel_size * \ self.tensor_parallel_size @@ -1209,6 +1303,24 @@ class SchedulerConfig: chunked_prefill_enabled: bool = field(init=False) + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + def __post_init__(self) -> None: if self.max_num_batched_tokens is None: if self.enable_chunked_prefill: @@ -1286,6 +1398,25 @@ class DeviceConfig: device: Optional[torch.device] device_type: str + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # the device/platform information will be summarized + # by torch/vllm automatically. + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + def __init__(self, device: str = "auto") -> None: if device == "auto": # Automated device type detection @@ -1313,6 +1444,24 @@ class SpeculativeConfig: decoding with top-1 proposals. """ + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # spec decode does not use `torch.compile` yet. + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + @staticmethod def maybe_create_spec_config( target_model_config: ModelConfig, @@ -1753,6 +1902,24 @@ class LoRAConfig: long_lora_scaling_factors: Optional[Tuple[float]] = None bias_enabled: bool = False + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # LoRA is not compatible with `torch.compile` . + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + def __post_init__(self): # Setting the maximum rank to 256 should be able to satisfy the vast # majority of applications. @@ -1802,6 +1969,24 @@ class PromptAdapterConfig: max_cpu_prompt_adapters: Optional[int] = None prompt_adapter_dtype: Optional[torch.dtype] = None + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + def __post_init__(self): if self.max_prompt_adapters < 1: @@ -1830,6 +2015,24 @@ class MultiModalConfig: for each :class:`~vllm.multimodal.MultiModalPlugin`. """ + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + # TODO: Add configs to init vision tower or not. @@ -1869,6 +2072,24 @@ class PoolerConfig: ``math-shepherd-mistral-7b-prm`` model. """ + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + @staticmethod def from_json(json_str: str) -> "PoolerConfig": return PoolerConfig(**json.loads(json_str)) @@ -2103,6 +2324,24 @@ class DecodingConfig: # 'outlines' / 'lm-format-enforcer' / 'xgrammar' guided_decoding_backend: str = 'xgrammar' + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + def __post_init__(self): valid_guided_backends = ['outlines', 'lm-format-enforcer', 'xgrammar'] backend = self.guided_decoding_backend @@ -2124,6 +2363,24 @@ class ObservabilityConfig: # If set, collects the model execute time for the request. collect_model_execute_time: bool = False + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + def __post_init__(self): if not is_otel_available() and self.otlp_traces_endpoint is not None: raise ValueError( @@ -2165,6 +2422,24 @@ class KVTransferConfig(BaseModel): # The KV connector port, used to build distributed connection kv_port: int = 14579 + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: List[Any] = [] + hash_str = hashlib.md5(str(factors).encode()).hexdigest() + return hash_str + @classmethod def from_cli(cls, cli_value: str) -> "KVTransferConfig": """Parse the CLI value for the kv cache transfer config.""" @@ -2234,6 +2509,9 @@ class CompilationConfig(BaseModel): - 2: dynamo once. - 3: piecewise compilation. - debug_dump_path: the path to dump the debug information. + - cache_dir: the directory to store the compiled graph, to + accelerate Inductor compilation. By default, it will use + model-related information to generate a cache directory. - backend: the backend for compilation. It needs to be a string. - "" (empty string): use the default backend. - "eager"/"openxla"/...: use the specified backend registered in PyTorch. @@ -2302,12 +2580,10 @@ class CompilationConfig(BaseModel): """ # noqa level: int = 0 debug_dump_path: str = "" + cache_dir: str = "" backend: str = "" custom_ops: List[str] = Field(default_factory=list) - splitting_ops: List[str] = Field(default_factory=lambda: [ - "vllm.unified_attention", - "vllm.unified_attention_with_output", - ]) + splitting_ops: List[str] = Field(default=None) # type: ignore use_inductor: bool = True candidate_compile_sizes: Optional[List[int]] = Field(default=None) @@ -2371,12 +2647,37 @@ def model_post_init(self, __context: Any) -> None: enabled_custom_ops: Counter[str] = PrivateAttr disabled_custom_ops: Counter[str] = PrivateAttr compilation_time: float = PrivateAttr + # should be InductorHashCache, but Pydantic does not support it + inductor_hash_cache: Any = PrivateAttr # Per-model forward context # Mainly used to store attention cls # Map from layer name to the attention cls static_forward_context: Dict[str, Any] = PrivateAttr + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + factors: List[Any] = [] + factors.append(self.level) + factors.append(self.backend) + factors.append(self.custom_ops) + factors.append(self.splitting_ops) + factors.append(self.use_inductor) + factors.append(self.inductor_compile_config) + factors.append(self.inductor_passes) + factors.append(self.pass_config.uuid()) + return hashlib.sha256(str(factors).encode()).hexdigest() + def __repr__(self) -> str: exclude = { "static_forward_context", @@ -2405,6 +2706,27 @@ def model_post_init(self, __context: Any) -> None: count_all = self.custom_ops.count("all") assert count_none + count_all <= 1, "Can only specify 'none' or 'all'" + if self.splitting_ops is None: + if envs.VLLM_USE_V1: + # v1 must split the graph on attention ops + # for piecewise cudagraph + self.splitting_ops = [ + "vllm.unified_attention", + "vllm.unified_attention_with_output", + ] + else: + # v0 can use full graph compilation without splitting, + # splitting is optional. + # right now we still need it. kv cache shape + # will be included in the graph if we don't split + # the graph. + # TODO: hide kv cache in static forward context + # so that inductor does not see it. + self.splitting_ops = [ + "vllm.unified_attention", + "vllm.unified_attention_with_output", + ] + for k, v in self.inductor_passes.items(): if not isinstance(v, str): assert callable(v), ( @@ -2444,6 +2766,30 @@ def init_backend(self, vllm_config: "VllmConfig") -> Union[str, Callable]: # TODO: pass user-specified backend to piecewise compilation # merge with the config use_inductor assert self.level == CompilationLevel.PIECEWISE + + if not self.cache_dir: + # no provided cache dir, generate one based on the known factors + # that affects the compilation. if none of the factors change, + # the cache dir will be the same so that we can reuse the compiled + # graph. + hash_key = vllm_config.compute_hash() + cache_dir = os.path.join( + envs.VLLM_CACHE_ROOT, "torch_compile_cache", hash_key, + f"rank_{vllm_config.parallel_config.rank}") + os.makedirs(cache_dir, exist_ok=True) + self.cache_dir = cache_dir + + disabled = envs.VLLM_DISABLE_COMPILE_CACHE + from vllm.compilation.backends import InductorHashCache + self.inductor_hash_cache: InductorHashCache = InductorHashCache( + self.cache_dir, disabled=disabled) + if disabled: + logger.info("vLLM's torch.compile cache is disabled.") + else: + logger.info( + "Using cache directory: %s for vLLM's torch.compile", + self.cache_dir) + from vllm.compilation.backends import VllmBackend return VllmBackend(vllm_config) @@ -2520,6 +2866,67 @@ class VllmConfig: init=True) # type: ignore instance_id: str = "" + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + factors: List[Any] = [] + # summarize system state + from torch._inductor.codecache import CacheBase + system_factors = CacheBase.get_system() + factors.append(system_factors) + + # summarize pytorch state + from torch._inductor.codecache import torch_key + torch_factors = torch_key() + factors.append(torch_factors) + + # summarize vllm config + vllm_factors: List[Any] = [] + from vllm import __version__ + vllm_factors.append(__version__) + if self.model_config: + vllm_factors.append(self.model_config.compute_hash()) + if self.cache_config: + vllm_factors.append(self.cache_config.compute_hash()) + if self.parallel_config: + vllm_factors.append(self.parallel_config.compute_hash()) + if self.scheduler_config: + vllm_factors.append(self.scheduler_config.compute_hash()) + if self.device_config: + vllm_factors.append(self.device_config.compute_hash()) + if self.load_config: + vllm_factors.append(self.load_config.compute_hash()) + if self.lora_config: + vllm_factors.append(self.lora_config.compute_hash()) + if self.speculative_config: + vllm_factors.append(self.speculative_config.compute_hash()) + if self.decoding_config: + vllm_factors.append(self.decoding_config.compute_hash()) + if self.observability_config: + vllm_factors.append(self.observability_config.compute_hash()) + if self.prompt_adapter_config: + vllm_factors.append(self.prompt_adapter_config.compute_hash()) + if self.quant_config: + pass # should be captured by model_config.quantization + if self.compilation_config: + vllm_factors.append(self.compilation_config.compute_hash()) + if self.kv_transfer_config: + vllm_factors.append(self.kv_transfer_config.compute_hash()) + + factors.append(vllm_factors) + + hash_str = hashlib.md5(str(factors).encode()).hexdigest()[:10] + return hash_str + def pad_for_cudagraph(self, batch_size: int) -> int: # if batch_size > self.compilation_config.max_capture_size, # it should raise an IndexError. diff --git a/vllm/envs.py b/vllm/envs.py index da17b747ea215..18870c1c6b51a 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -71,6 +71,7 @@ VLLM_USE_V1: bool = False VLLM_ENABLE_V1_MULTIPROCESSING: bool = True VLLM_LOG_BATCHSIZE_INTERVAL: float = -1 + VLLM_DISABLE_COMPILE_CACHE: bool = False def get_default_cache_root(): @@ -463,6 +464,8 @@ def get_default_config_root(): lambda: bool(int(os.getenv("VLLM_ENABLE_V1_MULTIPROCESSING", "1"))), "VLLM_LOG_BATCHSIZE_INTERVAL": lambda: float(os.getenv("VLLM_LOG_BATCHSIZE_INTERVAL", "-1")), + "VLLM_DISABLE_COMPILE_CACHE": + lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))), } # end-env-vars-definition From 35bae114a89e03e3dc6a6d2f758378e58938bffa Mon Sep 17 00:00:00 2001 From: youkaichao Date: Mon, 16 Dec 2024 17:22:38 -0800 Subject: [PATCH 20/72] fix gh200 tests on main (#11246) Signed-off-by: youkaichao --- .buildkite/run-gh200-test.sh | 4 ++-- docs/source/serving/deploying_with_docker.rst | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.buildkite/run-gh200-test.sh b/.buildkite/run-gh200-test.sh index d25510c47fe6b..d06604f96f2b8 100644 --- a/.buildkite/run-gh200-test.sh +++ b/.buildkite/run-gh200-test.sh @@ -6,8 +6,8 @@ set -ex # Try building the docker image DOCKER_BUILDKIT=1 docker build . \ - --target test \ - -platform "linux/arm64" \ + --target vllm-openai \ + --platform "linux/arm64" \ -t gh200-test \ --build-arg max_jobs=66 \ --build-arg nvcc_threads=2 \ diff --git a/docs/source/serving/deploying_with_docker.rst b/docs/source/serving/deploying_with_docker.rst index 11a9f12fd17cd..56f0020a1011a 100644 --- a/docs/source/serving/deploying_with_docker.rst +++ b/docs/source/serving/deploying_with_docker.rst @@ -54,16 +54,13 @@ of PyTorch Nightly and should be considered **experimental**. Using the flag `-- # Example of building on Nvidia GH200 server. (Memory usage: ~12GB, Build time: ~1475s / ~25 min, Image size: 7.26GB) $ DOCKER_BUILDKIT=1 sudo docker build . \ --target vllm-openai \ - -platform "linux/arm64" \ + --platform "linux/arm64" \ -t vllm/vllm-gh200-openai:latest \ --build-arg max_jobs=66 \ --build-arg nvcc_threads=2 \ --build-arg torch_cuda_arch_list="9.0+PTX" \ --build-arg vllm_fa_cmake_gpu_arches="90-real" - - - To run vLLM: .. code-block:: console From 0064f697d318a2ce38342f7c20754cf229311b8b Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Mon, 16 Dec 2024 22:39:58 -0500 Subject: [PATCH 21/72] [CI] Add test case with JSON schema using references + use xgrammar by default with OpenAI parse (#10935) Signed-off-by: mgoin --- tests/entrypoints/conftest.py | 39 +++++++++++++++++++ tests/entrypoints/llm/test_guided_generate.py | 28 +++++++++++++ vllm/entrypoints/openai/protocol.py | 2 +- 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/tests/entrypoints/conftest.py b/tests/entrypoints/conftest.py index 0f7d15e1d85aa..ef74062ce4b41 100644 --- a/tests/entrypoints/conftest.py +++ b/tests/entrypoints/conftest.py @@ -100,6 +100,45 @@ def sample_complex_json_schema(): } +@pytest.fixture +def sample_definition_json_schema(): + return { + '$defs': { + 'Step': { + 'properties': { + 'explanation': { + 'title': 'Explanation', + 'type': 'string' + }, + 'output': { + 'title': 'Output', + 'type': 'string' + } + }, + 'required': ['explanation', 'output'], + 'title': 'Step', + 'type': 'object' + } + }, + 'properties': { + 'steps': { + 'items': { + '$ref': '#/$defs/Step' + }, + 'title': 'Steps', + 'type': 'array' + }, + 'final_answer': { + 'title': 'Final Answer', + 'type': 'string' + } + }, + 'required': ['steps', 'final_answer'], + 'title': 'MathReasoning', + 'type': 'object' + } + + @pytest.fixture def sample_guided_choice(): return [ diff --git a/tests/entrypoints/llm/test_guided_generate.py b/tests/entrypoints/llm/test_guided_generate.py index de6257cfc551c..ed50ec6bbc9eb 100644 --- a/tests/entrypoints/llm/test_guided_generate.py +++ b/tests/entrypoints/llm/test_guided_generate.py @@ -104,6 +104,34 @@ def test_guided_complex_json_completion(sample_complex_json_schema, llm): schema=sample_complex_json_schema) +@pytest.mark.skip_global_cleanup +def test_guided_definition_json_completion(sample_definition_json_schema, llm): + sampling_params = SamplingParams(temperature=1.0, + max_tokens=1000, + guided_decoding=GuidedDecodingParams( + json=sample_definition_json_schema)) + outputs = llm.generate(prompts=[ + f"Give an example JSON for solving 8x + 7 = -23 " + f"that fits this schema: {sample_definition_json_schema}" + ] * 2, + sampling_params=sampling_params, + use_tqdm=True) + + assert outputs is not None + + for output in outputs: + assert output is not None + assert isinstance(output, RequestOutput) + prompt = output.prompt + + generated_text = output.outputs[0].text + assert generated_text is not None + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + output_json = json.loads(generated_text) + jsonschema.validate(instance=output_json, + schema=sample_definition_json_schema) + + @pytest.mark.skip_global_cleanup def test_guided_choice_completion(sample_guided_choice, llm): sampling_params = SamplingParams( diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 6ed7c2e9dcd6b..5a70e0952666b 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -387,7 +387,7 @@ def to_sampling_params( assert json_schema is not None self.guided_json = json_schema.json_schema if self.guided_decoding_backend is None: - self.guided_decoding_backend = "lm-format-enforcer" + self.guided_decoding_backend = "xgrammar" guided_decoding = GuidedDecodingParams.from_optional( json=self._get_guided_json_from_tool() or self.guided_json, From 66d4b16724226e9f377551198cc7425c12ddafae Mon Sep 17 00:00:00 2001 From: kYLe Date: Tue, 17 Dec 2024 00:09:58 -0600 Subject: [PATCH 22/72] [Frontend] Add OpenAI API support for input_audio (#11027) Signed-off-by: DarkLight1337 Co-authored-by: DarkLight1337 --- .../serving/openai_compatible_server.md | 10 +- docs/source/usage/multimodal_inputs.rst | 90 ++++++++++++- ...i_chat_completion_client_for_multimodal.py | 34 ++++- tests/entrypoints/openai/test_audio.py | 125 +++++++++++++++++- vllm/entrypoints/chat_utils.py | 65 +++++++-- 5 files changed, 301 insertions(+), 23 deletions(-) diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md index 14a5b02d72aa5..1bc8d32d2d161 100644 --- a/docs/source/serving/openai_compatible_server.md +++ b/docs/source/serving/openai_compatible_server.md @@ -34,11 +34,6 @@ We currently support the following OpenAI APIs: - *Note: `suffix` parameter is not supported.* - [Chat Completions API](#chat-api) (`/v1/chat/completions`) - Only applicable to [text generation models](../models/generative_models.rst) (`--task generate`) with a [chat template](#chat-template). - - [Vision](https://platform.openai.com/docs/guides/vision)-related parameters are supported; see [Multimodal Inputs](../usage/multimodal_inputs.rst). - - *Note: `image_url.detail` parameter is not supported.* - - We also support `audio_url` content type for audio files. - - Refer to [vllm.entrypoints.chat_utils](https://github.com/vllm-project/vllm/tree/main/vllm/entrypoints/chat_utils.py) for the exact schema. - - *TODO: Support `input_audio` content type as defined [here](https://github.com/openai/openai-python/blob/v1.52.2/src/openai/types/chat/chat_completion_content_part_input_audio_param.py).* - *Note: `parallel_tool_calls` and `user` parameters are ignored.* - [Embeddings API](#embeddings-api) (`/v1/embeddings`) - Only applicable to [embedding models](../models/pooling_models.rst) (`--task embed`). @@ -209,6 +204,11 @@ The following extra parameters are supported: Refer to [OpenAI's API reference](https://platform.openai.com/docs/api-reference/chat) for more details. +We support both [Vision](https://platform.openai.com/docs/guides/vision)- and +[Audio](https://platform.openai.com/docs/guides/audio?audio-generation-quickstart-example=audio-in)-related parameters; +see our [Multimodal Inputs](../usage/multimodal_inputs.rst) guide for more information. +- *Note: `image_url.detail` parameter is not supported.* + #### Extra parameters The following [sampling parameters (click through to see documentation)](../dev/sampling_params.rst) are supported. diff --git a/docs/source/usage/multimodal_inputs.rst b/docs/source/usage/multimodal_inputs.rst index 1e00f26f9a3ba..680382e457cc5 100644 --- a/docs/source/usage/multimodal_inputs.rst +++ b/docs/source/usage/multimodal_inputs.rst @@ -315,7 +315,95 @@ You can use `these tests `_. +Here is a simple example using Ultravox-v0.3. + +First, launch the OpenAI-compatible server: + +.. code-block:: bash + + vllm serve fixie-ai/ultravox-v0_3 + +Then, you can use the OpenAI client as follows: + +.. code-block:: python + + import base64 + import requests + from openai import OpenAI + from vllm.assets.audio import AudioAsset + + def encode_base64_content_from_url(content_url: str) -> str: + """Encode a content retrieved from a remote url to base64 format.""" + + with requests.get(content_url) as response: + response.raise_for_status() + result = base64.b64encode(response.content).decode('utf-8') + + return result + + openai_api_key = "EMPTY" + openai_api_base = "http://localhost:8000/v1" + + client = OpenAI( + api_key=openai_api_key, + base_url=openai_api_base, + ) + + # Any format supported by librosa is supported + audio_url = AudioAsset("winning_call").url + audio_base64 = encode_base64_content_from_url(audio_url) + + chat_completion_from_base64 = client.chat.completions.create( + messages=[{ + "role": "user", + "content": [ + { + "type": "text", + "text": "What's in this audio?" + }, + { + "type": "input_audio", + "input_audio": { + "data": audio_base64, + "format": "wav" + }, + }, + ], + }], + model=model, + max_completion_tokens=64, + ) + + result = chat_completion_from_base64.choices[0].message.content + print("Chat completion output from input audio:", result) + +Alternatively, you can pass :code:`audio_url`, which is the audio counterpart of :code:`image_url` for image input: + +.. code-block:: python + + chat_completion_from_url = client.chat.completions.create( + messages=[{ + "role": "user", + "content": [ + { + "type": "text", + "text": "What's in this audio?" + }, + { + "type": "audio_url", + "audio_url": { + "url": audio_url + }, + }, + ], + }], + model=model, + max_completion_tokens=64, + ) + + result = chat_completion_from_url.choices[0].message.content + print("Chat completion output from audio url:", result) A full code example can be found in `examples/openai_chat_completion_client_for_multimodal.py `_. diff --git a/examples/openai_chat_completion_client_for_multimodal.py b/examples/openai_chat_completion_client_for_multimodal.py index 0ec4f71dddf93..6a160fd70423f 100644 --- a/examples/openai_chat_completion_client_for_multimodal.py +++ b/examples/openai_chat_completion_client_for_multimodal.py @@ -153,10 +153,37 @@ def run_multi_image() -> None: # Audio input inference def run_audio() -> None: - # Any format supported by librosa is supported audio_url = AudioAsset("winning_call").url + audio_base64 = encode_base64_content_from_url(audio_url) + + # OpenAI-compatible schema (`input_audio`) + chat_completion_from_base64 = client.chat.completions.create( + messages=[{ + "role": + "user", + "content": [ + { + "type": "text", + "text": "What's in this audio?" + }, + { + "type": "input_audio", + "input_audio": { + # Any format supported by librosa is supported + "data": audio_base64, + "format": "wav" + }, + }, + ], + }], + model=model, + max_completion_tokens=64, + ) + + result = chat_completion_from_base64.choices[0].message.content + print("Chat completion output from input audio:", result) - # Use audio url in the payload + # HTTP URL chat_completion_from_url = client.chat.completions.create( messages=[{ "role": @@ -169,6 +196,7 @@ def run_audio() -> None: { "type": "audio_url", "audio_url": { + # Any format supported by librosa is supported "url": audio_url }, }, @@ -181,7 +209,7 @@ def run_audio() -> None: result = chat_completion_from_url.choices[0].message.content print("Chat completion output from audio url:", result) - audio_base64 = encode_base64_content_from_url(audio_url) + # base64 URL chat_completion_from_base64 = client.chat.completions.create( messages=[{ "role": diff --git a/tests/entrypoints/openai/test_audio.py b/tests/entrypoints/openai/test_audio.py index b579dcbb5c402..0a29d77e73abc 100644 --- a/tests/entrypoints/openai/test_audio.py +++ b/tests/entrypoints/openai/test_audio.py @@ -155,6 +155,61 @@ async def test_single_chat_session_audio_base64encoded( assert message.content is not None and len(message.content) >= 0 +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("audio_url", TEST_AUDIO_URLS) +async def test_single_chat_session_input_audio( + client: openai.AsyncOpenAI, model_name: str, audio_url: str, + base64_encoded_audio: Dict[str, str]): + messages = [{ + "role": + "user", + "content": [ + { + "type": "input_audio", + "input_audio": { + "data": base64_encoded_audio[audio_url], + "format": "wav" + } + }, + { + "type": "text", + "text": "What's happening in this audio?" + }, + ], + }] + + # test single completion + chat_completion = await client.chat.completions.create( + model=model_name, + messages=messages, + max_completion_tokens=10, + logprobs=True, + top_logprobs=5) + assert len(chat_completion.choices) == 1 + + choice = chat_completion.choices[0] + assert choice.finish_reason == "length" + assert chat_completion.usage == openai.types.CompletionUsage( + completion_tokens=10, prompt_tokens=202, total_tokens=212) + + message = choice.message + message = chat_completion.choices[0].message + assert message.content is not None and len(message.content) >= 10 + assert message.role == "assistant" + messages.append({"role": "assistant", "content": message.content}) + + # test multi-turn dialogue + messages.append({"role": "user", "content": "express your result in json"}) + chat_completion = await client.chat.completions.create( + model=model_name, + messages=messages, + max_completion_tokens=10, + ) + message = chat_completion.choices[0].message + assert message.content is not None and len(message.content) >= 0 + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("audio_url", TEST_AUDIO_URLS) @@ -212,11 +267,72 @@ async def test_chat_streaming_audio(client: openai.AsyncOpenAI, assert "".join(chunks) == output +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("audio_url", TEST_AUDIO_URLS) +async def test_chat_streaming_input_audio(client: openai.AsyncOpenAI, + model_name: str, audio_url: str, + base64_encoded_audio: Dict[str, + str]): + messages = [{ + "role": + "user", + "content": [ + { + "type": "input_audio", + "input_audio": { + "data": base64_encoded_audio[audio_url], + "format": "wav" + } + }, + { + "type": "text", + "text": "What's happening in this audio?" + }, + ], + }] + + # test single completion + chat_completion = await client.chat.completions.create( + model=model_name, + messages=messages, + max_completion_tokens=10, + temperature=0.0, + ) + output = chat_completion.choices[0].message.content + stop_reason = chat_completion.choices[0].finish_reason + + # test streaming + stream = await client.chat.completions.create( + model=model_name, + messages=messages, + max_completion_tokens=10, + temperature=0.0, + stream=True, + ) + chunks: List[str] = [] + finish_reason_count = 0 + async for chunk in stream: + delta = chunk.choices[0].delta + if delta.role: + assert delta.role == "assistant" + if delta.content: + chunks.append(delta.content) + if chunk.choices[0].finish_reason is not None: + finish_reason_count += 1 + # finish reason should only return in last block + assert finish_reason_count == 1 + assert chunk.choices[0].finish_reason == stop_reason + assert delta.content + assert "".join(chunks) == output + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("audio_url", TEST_AUDIO_URLS) async def test_multi_audio_input(client: openai.AsyncOpenAI, model_name: str, - audio_url: str): + audio_url: str, + base64_encoded_audio: Dict[str, str]): messages = [{ "role": @@ -229,9 +345,10 @@ async def test_multi_audio_input(client: openai.AsyncOpenAI, model_name: str, } }, { - "type": "audio_url", - "audio_url": { - "url": audio_url + "type": "input_audio", + "input_audio": { + "data": base64_encoded_audio[audio_url], + "format": "wav" } }, { diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index aaa5cd759366a..3df08c740d65b 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -13,7 +13,8 @@ # yapf conflicts with isort for this block # yapf: disable from openai.types.chat import (ChatCompletionAssistantMessageParam, - ChatCompletionContentPartImageParam) + ChatCompletionContentPartImageParam, + ChatCompletionContentPartInputAudioParam) from openai.types.chat import ( ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam) from openai.types.chat import (ChatCompletionContentPartRefusalParam, @@ -105,6 +106,7 @@ class CustomChatCompletionContentSimpleVideoParam(TypedDict, total=False): ChatCompletionContentPartParam: TypeAlias = Union[ OpenAIChatCompletionContentPartParam, ChatCompletionContentPartAudioParam, + ChatCompletionContentPartInputAudioParam, ChatCompletionContentPartVideoParam, ChatCompletionContentPartRefusalParam, CustomChatCompletionContentSimpleImageParam, CustomChatCompletionContentSimpleAudioParam, @@ -519,6 +521,10 @@ def parse_image(self, image_url: str) -> None: def parse_audio(self, audio_url: str) -> None: raise NotImplementedError + @abstractmethod + def parse_input_audio(self, input_audio: Dict[str, str]) -> None: + raise NotImplementedError + @abstractmethod def parse_video(self, video_url: str) -> None: raise NotImplementedError @@ -545,6 +551,15 @@ def parse_audio(self, audio_url: str) -> None: placeholder = self._tracker.add("audio", audio) self._add_placeholder(placeholder) + def parse_input_audio(self, input_audio: Dict[str, str]) -> None: + input_audio_data = input_audio.get("data","") + input_audio_format = input_audio.get("format","") + audio_url = f"data:audio/{input_audio_format};base64,{input_audio_data}" + audio = get_and_parse_audio(audio_url) + + placeholder = self._tracker.add("audio", audio) + self._add_placeholder(placeholder) + def parse_video(self, video_url: str) -> None: video = get_and_parse_video(video_url) @@ -574,6 +589,15 @@ def parse_audio(self, audio_url: str) -> None: placeholder = self._tracker.add("audio", audio_coro) self._add_placeholder(placeholder) + def parse_input_audio(self, input_audio: Dict[str, str]) -> None: + input_audio_data = input_audio.get("data","") + input_audio_format = input_audio.get("format","") + audio_url = f"data:audio/{input_audio_format};base64,{input_audio_data}" + audio_coro = async_get_and_parse_audio(audio_url) + + placeholder = self._tracker.add("audio", audio_coro) + self._add_placeholder(placeholder) + def parse_video(self, video_url: str) -> None: video = async_get_and_parse_video(video_url) @@ -667,17 +691,22 @@ def _get_full_multimodal_text_prompt(placeholder_counts: Dict[str, int], _TextParser = partial(cast, ChatCompletionContentPartTextParam) _ImageParser = partial(cast, ChatCompletionContentPartImageParam) _AudioParser = partial(cast, ChatCompletionContentPartAudioParam) +_InputAudioParser = partial(cast, ChatCompletionContentPartInputAudioParam) _RefusalParser = partial(cast, ChatCompletionContentPartRefusalParam) _VideoParser = partial(cast, ChatCompletionContentPartVideoParam) # Define a mapping from part types to their corresponding parsing functions. -MM_PARSER_MAP: Dict[str, Callable[[ChatCompletionContentPartParam], str]] = { +MM_PARSER_MAP: Dict[str, + Callable[[ChatCompletionContentPartParam], + Union[str, Dict[str,str]]]] = { "text": lambda part: _TextParser(part).get("text", ""), "image_url": lambda part: _ImageParser(part).get("image_url", {}).get("url", ""), "audio_url": lambda part: _AudioParser(part).get("audio_url", {}).get("url", ""), + "input_audio": + lambda part: _InputAudioParser(part).get("input_audio", {}), "refusal": lambda part: _RefusalParser(part).get("refusal", ""), "video_url": @@ -686,7 +715,8 @@ def _get_full_multimodal_text_prompt(placeholder_counts: Dict[str, int], def _parse_chat_message_content_mm_part( - part: ChatCompletionContentPartParam) -> Tuple[str, str]: + part: ChatCompletionContentPartParam) -> Tuple[str, + Union[str, Dict[str, str]]]: """ Parses a given multi-modal content part based on its type. @@ -717,6 +747,7 @@ def _parse_chat_message_content_mm_part( return part_type, content # Handle missing 'type' but provided direct URL fields. + # 'type' is required field by pydantic if part_type is None: if part.get("image_url") is not None: image_params = cast(CustomChatCompletionContentSimpleImageParam, @@ -726,6 +757,9 @@ def _parse_chat_message_content_mm_part( audio_params = cast(CustomChatCompletionContentSimpleAudioParam, part) return "audio_url", audio_params.get("audio_url", "") + if part.get("input_audio") is not None: + input_audio_params = cast(Dict[str, str], part) + return "input_audio", input_audio_params if part.get("video_url") is not None: video_params = cast(CustomChatCompletionContentSimpleVideoParam, part) @@ -739,7 +773,7 @@ def _parse_chat_message_content_mm_part( VALID_MESSAGE_CONTENT_MM_PART_TYPES = ("text", "refusal", "image_url", - "audio_url", "video_url") + "audio_url", "input_audio", "video_url") def _parse_chat_message_content_parts( @@ -795,7 +829,7 @@ def _parse_chat_message_content_part( # Handle structured dictionary parts part_type, content = _parse_chat_message_content_mm_part(part) - # if part_type is text/refusal/image_url/audio_url/video_url but + # if part_type is text/refusal/image_url/audio_url/video_url/input_audio but # content is empty, log a warning and skip if part_type in VALID_MESSAGE_CONTENT_MM_PART_TYPES and not content: logger.warning( @@ -804,18 +838,30 @@ def _parse_chat_message_content_part( return None if part_type in ("text", "refusal"): - return {'type': 'text', 'text': content} if wrap_dicts else content + str_content = cast(str, content) + if wrap_dicts: + return {'type': 'text', 'text': str_content} + else: + return str_content if part_type == "image_url": - mm_parser.parse_image(content) + str_content = cast(str, content) + mm_parser.parse_image(str_content) return {'type': 'image'} if wrap_dicts else None if part_type == "audio_url": - mm_parser.parse_audio(content) + str_content = cast(str, content) + mm_parser.parse_audio(str_content) + return {'type': 'audio'} if wrap_dicts else None + + if part_type == "input_audio": + dict_content = cast(Dict[str, str], content) + mm_parser.parse_input_audio(dict_content) return {'type': 'audio'} if wrap_dicts else None if part_type == "video_url": - mm_parser.parse_video(content) + str_content = cast(str, content) + mm_parser.parse_video(str_content) return {'type': 'video'} if wrap_dicts else None raise NotImplementedError(f"Unknown part type: {part_type}") @@ -840,7 +886,6 @@ def _parse_chat_message_content( content = [ ChatCompletionContentPartTextParam(type="text", text=content) ] - result = _parse_chat_message_content_parts( role, content, # type: ignore From 59c9b6ebeba79b2d744eec86734a7e13b03dcab7 Mon Sep 17 00:00:00 2001 From: Roger Wang <136131678+ywang96@users.noreply.github.com> Date: Mon, 16 Dec 2024 22:10:57 -0800 Subject: [PATCH 23/72] [V1][VLM] Proper memory profiling for image language models (#11210) Signed-off-by: Roger Wang Co-authored-by: ywang96 --- vllm/config.py | 8 ++++ vllm/model_executor/models/pixtral.py | 5 ++ vllm/multimodal/registry.py | 23 +++++++-- vllm/v1/core/scheduler.py | 7 ++- vllm/v1/engine/mm_input_mapper.py | 1 + vllm/v1/worker/gpu_model_runner.py | 67 ++++++++++++++++++++++++--- 6 files changed, 98 insertions(+), 13 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 9cfd08024ea7b..9ecd3e72afa9f 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1280,6 +1280,14 @@ class SchedulerConfig: is_multimodal_model: bool = False + # FIXME(woosuk & ywang96): Below are placeholder values. We need to + # calculate the actual values from the configurations. + # Multimodal encoder run compute budget, only used in V1 + max_num_encoder_input_tokens = 16384 + + # Multimodal encoder cache size, only used in V1 + encoder_cache_size = 16384 + # Whether to perform preemption by swapping or # recomputation. If not specified, we determine the mode as follows: # We use recomputation by default since it incurs lower overhead than diff --git a/vllm/model_executor/models/pixtral.py b/vllm/model_executor/models/pixtral.py index 161d6b41bfa5f..f05ea195e043d 100644 --- a/vllm/model_executor/models/pixtral.py +++ b/vllm/model_executor/models/pixtral.py @@ -245,6 +245,11 @@ def get_multimodal_embeddings(self, **kwargs) -> Optional[NestedTensors]: # Do not split, return as tensor of shape [1, fs, hs] return image_embeds.unsqueeze(0) + # If the last split index is the last index in image_tokens, we + # ignore it to avoid empty split tensor + if split_indices[-1] == len(image_tokens): + split_indices = split_indices[:-1] + image_embeds = image_embeds.tensor_split(split_indices.cpu()) return image_embeds diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py index 03f8814a95356..6cd79d414c978 100644 --- a/vllm/multimodal/registry.py +++ b/vllm/multimodal/registry.py @@ -200,6 +200,23 @@ def register_max_image_tokens( """ return self.register_max_multimodal_tokens("image", max_mm_tokens) + def get_max_tokens_per_item_by_modality( + self, + model_config: "ModelConfig", + ) -> Mapping[str, int]: + """ + Get the maximum number of tokens per data item from each modality + for profiling the memory usage of a model. + + Note: + This is currently directly used only in V1. + """ + + return { + key: plugin.get_max_multimodal_tokens(model_config) + for key, plugin in self._plugins.items() + } + def get_max_tokens_by_modality( self, model_config: "ModelConfig", @@ -216,9 +233,9 @@ def get_max_tokens_by_modality( limits_per_plugin = self._limits_by_model[model_config] return { - key: (limits_per_plugin[key] * - plugin.get_max_multimodal_tokens(model_config)) - for key, plugin in self._plugins.items() + key: limits_per_plugin[key] * max_tokens_per_mm_item + for key, max_tokens_per_mm_item in + self.get_max_tokens_per_item_by_modality(model_config).items() } def get_max_multimodal_tokens(self, model_config: "ModelConfig") -> int: diff --git a/vllm/v1/core/scheduler.py b/vllm/v1/core/scheduler.py index f76364f64033d..178532e477dae 100644 --- a/vllm/v1/core/scheduler.py +++ b/vllm/v1/core/scheduler.py @@ -73,14 +73,13 @@ def __init__( # NOTE(woosuk): Here, "encoder" includes the vision encoder (and # projector if needed). Currently, we assume that the encoder also # has the Transformer architecture (e.g., ViT). - # FIXME(woosuk): Below are placeholder values. We need to calculate the - # actual values from the configurations. - self.max_num_encoder_input_tokens = 16384 + self.max_num_encoder_input_tokens = self.scheduler_config.max_num_encoder_input_tokens #noqa: E501 # NOTE(woosuk): For the models without encoder (e.g., text-only models), # the encoder cache will not be initialized and used, regardless of # the cache size. This is because the memory space for the encoder cache # is preallocated in the profiling run. - self.encoder_cache_manager = EncoderCacheManager(cache_size=16384) + self.encoder_cache_manager = EncoderCacheManager( + cache_size=self.scheduler_config.encoder_cache_size) def schedule(self) -> "SchedulerOutput": # NOTE(woosuk) on the scheduling algorithm: diff --git a/vllm/v1/engine/mm_input_mapper.py b/vllm/v1/engine/mm_input_mapper.py index cca27c2218af7..6cdeba6f3f71e 100644 --- a/vllm/v1/engine/mm_input_mapper.py +++ b/vllm/v1/engine/mm_input_mapper.py @@ -54,6 +54,7 @@ def cache_hit_ratio(self, steps): logger.debug("MMInputMapper: cache_hit_ratio = %.2f ", self.mm_cache_hits / self.mm_cache_total) + # TODO: Support modalities beyond image. def process_inputs( self, mm_data: MultiModalDataDict, diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 67166fb05085c..c6fab5f05fcb3 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -10,15 +10,16 @@ from vllm.config import CompilationLevel, VllmConfig from vllm.distributed.parallel_state import graph_capture from vllm.forward_context import set_forward_context -from vllm.inputs import INPUT_REGISTRY, InputRegistry +from vllm.inputs import INPUT_REGISTRY from vllm.logger import init_logger from vllm.model_executor.model_loader import get_model -from vllm.multimodal import MultiModalKwargs +from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargs from vllm.sampling_params import SamplingType from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler, LayerBlockType, cdiv, is_pin_memory_available) from vllm.v1.attention.backends.flash_attn import (FlashAttentionBackend, FlashAttentionMetadata) +from vllm.v1.engine.mm_input_mapper import MMInputMapperClient from vllm.v1.outputs import ModelRunnerOutput from vllm.v1.sample.metadata import SamplingMetadata from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch @@ -35,7 +36,6 @@ def __init__( self, vllm_config: VllmConfig, device: torch.device, - input_registry: InputRegistry = INPUT_REGISTRY, ): self.vllm_config = vllm_config self.model_config = vllm_config.model_config @@ -77,7 +77,12 @@ def __init__( self.hidden_size = model_config.get_hidden_size() # Multi-modal data support - self.input_registry = input_registry + self.input_registry = INPUT_REGISTRY + self.mm_registry = MULTIMODAL_REGISTRY + # NOTE: mm_input_mapper is only used for memory profiling. + self.mm_input_mapper = MMInputMapperClient(self.model_config) + self.max_num_encoder_input_tokens = self.scheduler_config.max_num_encoder_input_tokens # noqa: E501 + self.encoder_cache_size = self.scheduler_config.encoder_cache_size # Lazy initialization # self.model: nn.Module # Set after load_model @@ -599,8 +604,6 @@ def _dummy_run( return hidden_states def profile_run(self) -> None: - # TODO(woosuk): Profile the max memory usage of the encoder and - # the encoder cache. # use an empty tensor instead of `None`` to force Dynamo to pass # it by reference, rather by specializing on the value `None`. # the `dtype` argument does not matter, and we use `float32` as @@ -612,6 +615,57 @@ def profile_run(self) -> None: torch.tensor([], dtype=torch.float32, device=self.device) for _ in range(self.num_attn_layers) ] + + # Profile with multimodal encoder & encoder cache. + # TODO (ywang96): generalize this beyond image modality since + # mm_input_mapper only supports image inputs. + if self.is_multimodal_model: + + # Create dummy batch of multimodal inputs. + dummy_request_data = self.input_registry.dummy_data_for_profiling( + model_config=self.model_config, + seq_len=self.max_num_tokens, + mm_registry=self.mm_registry, + ) + dummy_mm_data = dummy_request_data.multi_modal_data + dummy_mm_kwargs, _ = self.mm_input_mapper.process_inputs( + mm_data=dummy_mm_data, + mm_hashes=None, + mm_processor_kwargs=None, + precomputed_mm_inputs=None) + + # NOTE: Currently model is profiled with a single non-text + # modality even when it supports multiple. + max_tokens_per_mm_item = max( + self.mm_registry.get_max_tokens_per_item_by_modality( + self.model_config).values()) + + max_num_mm_items = min( + self.max_num_encoder_input_tokens, + self.encoder_cache_size) // max_tokens_per_mm_item + + # Dummy data definition in V0 may contain multiple multimodal items + # (e.g, multiple images) for a single request, therefore here we + # always replicate first item by max_num_mm_items times since in V1 + # they are scheduled to be processed separately. + batched_dummy_mm_inputs = MultiModalKwargs.batch( + [dummy_mm_kwargs[0]] * max_num_mm_items) + batched_dummy_mm_inputs = MultiModalKwargs.as_kwargs( + batched_dummy_mm_inputs, device=self.device) + + # Run multimodal encoder. + dummy_encoder_outputs = self.model.get_multimodal_embeddings( + **batched_dummy_mm_inputs) + assert len(dummy_encoder_outputs) == max_num_mm_items, ( + "Expected dimension 0 of encoder outputs to match the number " + f"of multimodal data items: {max_num_mm_items}, got " + f"{len(dummy_encoder_outputs)=} instead. This is most likely " + "due to the 'get_multimodal_embeddings' method of the model " + "not implemented correctly.") + + # Cache the dummy encoder outputs. + self.encoder_cache["tmp"] = dict(enumerate(dummy_encoder_outputs)) + # Trigger compilation for general shape. hidden_states = self._dummy_run(self.model, self.max_num_tokens, dummy_kv_caches) @@ -620,6 +674,7 @@ def profile_run(self) -> None: # TODO(woosuk): Consider the memory usage of the sampler. torch.cuda.synchronize() del hidden_states, logits + self.encoder_cache.clear() gc.collect() def capture_model(self) -> None: From e88db68cf5712956f36e77c288699592327b15bd Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Tue, 17 Dec 2024 14:11:06 +0800 Subject: [PATCH 24/72] [Platform] platform agnostic for EngineArgs initialization (#11225) Signed-off-by: wangxiyuan --- vllm/engine/arg_utils.py | 8 ++------ vllm/platforms/cpu.py | 3 +++ vllm/platforms/cuda.py | 4 ++++ vllm/platforms/hpu.py | 6 ++++++ vllm/platforms/neuron.py | 6 ++++++ vllm/platforms/openvino.py | 3 +++ vllm/platforms/rocm.py | 4 ++++ vllm/platforms/tpu.py | 5 +++++ vllm/platforms/xpu.py | 4 ++++ 9 files changed, 37 insertions(+), 6 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 06b8542779dc0..f6d276fe7c0c8 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -112,9 +112,7 @@ class EngineArgs: pipeline_parallel_size: int = 1 tensor_parallel_size: int = 1 max_parallel_loading_workers: Optional[int] = None - # NOTE(kzawora): default block size for Gaudi should be 128 - # smaller sizes still work, but very inefficiently - block_size: int = 16 if not current_platform.is_hpu() else 128 + block_size: Optional[int] = None enable_prefix_caching: Optional[bool] = None disable_sliding_window: bool = False use_v2_block_manager: bool = True @@ -1036,9 +1034,7 @@ def create_engine_config(self, self.enable_prefix_caching = False cache_config = CacheConfig( - # neuron needs block_size = max_model_len - block_size=self.block_size if self.device != "neuron" else - (self.max_model_len if self.max_model_len is not None else 0), + block_size=self.block_size, gpu_memory_utilization=self.gpu_memory_utilization, swap_space=self.swap_space, cache_dtype=self.kv_cache_dtype, diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index aad8755d9fcd8..d95a2b4cd5565 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -60,6 +60,9 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: cache_config = vllm_config.cache_config + if cache_config and cache_config.block_size is None: + cache_config.block_size = 16 + kv_cache_space = envs.VLLM_CPU_KVCACHE_SPACE if kv_cache_space >= 0: diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index ae1fd6d5ce068..3c5350b778345 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -137,6 +137,10 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: else: parallel_config.worker_cls = "vllm.worker.worker.Worker" + cache_config = vllm_config.cache_config + if cache_config and cache_config.block_size is None: + cache_config.block_size = 16 + # NVML utils # Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`, diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index 2b947d280f9f8..0a44f2b74163a 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -48,6 +48,12 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: if parallel_config.worker_cls == "auto": parallel_config.worker_cls = "vllm.worker.hpu_worker.HPUWorker" + # NOTE(kzawora): default block size for Gaudi should be 128 + # smaller sizes still work, but very inefficiently + cache_config = vllm_config.cache_config + if cache_config and cache_config.block_size is None: + cache_config.block_size = 128 + @classmethod def is_pin_memory_available(cls): logger.warning("Pin memory is not supported on HPU.") diff --git a/vllm/platforms/neuron.py b/vllm/platforms/neuron.py index 86113523385f6..a4bbbd27c8a89 100644 --- a/vllm/platforms/neuron.py +++ b/vllm/platforms/neuron.py @@ -33,6 +33,12 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: parallel_config.worker_cls = \ "vllm.worker.neuron_worker.NeuronWorker" + cache_config = vllm_config.cache_config + if cache_config: + # neuron needs block_size = max_model_len + vllm_config.cache_config.block_size = \ + vllm_config.model_config.max_model_len + @classmethod def is_pin_memory_available(cls) -> bool: logger.warning("Pin memory is not supported on Neuron.") diff --git a/vllm/platforms/openvino.py b/vllm/platforms/openvino.py index ccd94e8adb3b1..16eb8dc81efc2 100644 --- a/vllm/platforms/openvino.py +++ b/vllm/platforms/openvino.py @@ -87,6 +87,9 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: # check and update cache config ov_core = ov.Core() cache_config = vllm_config.cache_config + if cache_config and cache_config.block_size is None: + cache_config.block_size = 16 + if envs.VLLM_OPENVINO_CPU_KV_CACHE_PRECISION == "u8": if not OpenVinoPlatform.is_openvino_cpu(): logger.info("VLLM_OPENVINO_CPU_KV_CACHE_PRECISION is" diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 0133f26a0b1bc..7778b565372cb 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -84,6 +84,10 @@ def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: @classmethod def check_and_update_config(cls, vllm_config: VllmConfig) -> None: + cache_config = vllm_config.cache_config + if cache_config and cache_config.block_size is None: + cache_config.block_size = 16 + parallel_config = vllm_config.parallel_config scheduler_config = vllm_config.scheduler_config if parallel_config.worker_cls == "auto": diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index 10d874349f36b..77f5c8401424b 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -46,6 +46,11 @@ def inference_mode(cls): @classmethod def check_and_update_config(cls, vllm_config: VllmConfig) -> None: from vllm.config import CompilationLevel + + cache_config = vllm_config.cache_config + if cache_config and cache_config.block_size is None: + cache_config.block_size = 16 + compilation_config = vllm_config.compilation_config if compilation_config.level == CompilationLevel.NO_COMPILATION: # TPU does not support NO_COMPILATION diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index c20190e789d7e..78e17c2afec65 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -51,6 +51,10 @@ def inference_mode(): @classmethod def check_and_update_config(cls, vllm_config: VllmConfig) -> None: + cache_config = vllm_config.cache_config + if cache_config and cache_config.block_size is None: + cache_config.block_size = 16 + # check and update model config model_config = vllm_config.model_config if model_config.dtype == torch.bfloat16: From 2bfdbf2a36256bb08547cea3d4ef83b5d27c4b04 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Tue, 17 Dec 2024 01:11:33 -0500 Subject: [PATCH 25/72] [V1][Core] Use weakref.finalize instead of atexit (#11242) Signed-off-by: Tyler Michael Smith --- vllm/v1/engine/core_client.py | 13 ++----------- vllm/v1/executor/multiproc_executor.py | 10 +++------- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index ff25a9b2e9cac..d56fcbdb1e7c4 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -1,5 +1,5 @@ -import atexit import os +import weakref from typing import List, Optional import msgspec @@ -165,15 +165,9 @@ def __init__( ready_path=ready_path, # type: ignore[misc] **kwargs, ) - atexit.register(self.shutdown) + self._finalizer = weakref.finalize(self, self.shutdown) def shutdown(self): - # During final garbage collection in process shutdown, atexit may be - # None. - if atexit: - # in case shutdown gets called via __del__ first - atexit.unregister(self.shutdown) - # Shut down the zmq context. self.ctx.destroy(linger=0) @@ -197,9 +191,6 @@ def shutdown(self): os.remove(socket_file) self.proc_handle = None - def __del__(self): - self.shutdown() - class SyncMPClient(MPClient): """Synchronous client for multi-proc EngineCore.""" diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py index 17441dacdc5cf..128101aa6956d 100644 --- a/vllm/v1/executor/multiproc_executor.py +++ b/vllm/v1/executor/multiproc_executor.py @@ -1,9 +1,9 @@ -import atexit import os import pickle import signal import sys import time +import weakref from dataclasses import dataclass from enum import Enum, auto from multiprocessing.process import BaseProcess @@ -37,7 +37,7 @@ class MultiprocExecutor(Executor): def __init__(self, vllm_config: VllmConfig) -> None: # Call self.shutdown at exit to clean up # and ensure workers will be terminated. - atexit.register(self.shutdown) + self._finalizer = weakref.finalize(self, self.shutdown) self.vllm_config = vllm_config self.parallel_config = vllm_config.parallel_config @@ -195,14 +195,10 @@ def _cleanup_sockets(self): os.remove(socket_path) def shutdown(self): - if atexit: - # in case shutdown was called explicitly, we don't need to call it - # again - atexit.unregister(self.shutdown) """Properly shut down the executor and its workers""" if getattr(self, 'shutting_down', False): self.shutting_down = True - for w in self.workers: #TODO: not sure if needed + for w in self.workers: w.worker_response_mq = None self._ensure_worker_termination() From 02222a0256f60319f5bcd56d1d036a943d6334f8 Mon Sep 17 00:00:00 2001 From: Roger Wang <136131678+ywang96@users.noreply.github.com> Date: Mon, 16 Dec 2024 22:57:02 -0800 Subject: [PATCH 26/72] [Misc] Kernel Benchmark for `RMSNorm` (#11241) Signed-off-by: Roger Wang Co-authored-by: Xiaoyu Zhang --- benchmarks/kernels/benchmark_rmsnorm.py | 262 ++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 benchmarks/kernels/benchmark_rmsnorm.py diff --git a/benchmarks/kernels/benchmark_rmsnorm.py b/benchmarks/kernels/benchmark_rmsnorm.py new file mode 100644 index 0000000000000..baa5de0fff1bd --- /dev/null +++ b/benchmarks/kernels/benchmark_rmsnorm.py @@ -0,0 +1,262 @@ +import itertools +from typing import Optional, Tuple, Union + +import torch +import triton +from flashinfer.norm import fused_add_rmsnorm, rmsnorm +from torch import nn + +from vllm import _custom_ops as vllm_ops + + +class HuggingFaceRMSNorm(nn.Module): + + def __init__(self, hidden_size: int, eps: float = 1e-6) -> None: + super().__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.variance_epsilon = eps + + def forward( + self, + x: torch.Tensor, + residual: Optional[torch.Tensor] = None, + ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: + orig_dtype = x.dtype + x = x.to(torch.float32) + if residual is not None: + x = x + residual.to(torch.float32) + residual = x.to(orig_dtype) + + variance = x.pow(2).mean(dim=-1, keepdim=True) + x = x * torch.rsqrt(variance + self.variance_epsilon) + x = x.to(orig_dtype) * self.weight + if residual is None: + return x + else: + return x, residual + + +def rmsnorm_naive( + x: torch.Tensor, + weight: torch.Tensor, + residual: Optional[torch.Tensor] = None, + eps: float = 1e-6, +): + naive_norm = HuggingFaceRMSNorm(x.shape[-1], eps=eps) + naive_norm.weight = nn.Parameter(weight) + naive_norm = naive_norm.to(x.device) + + orig_shape = x.shape + x = x.view(-1, x.shape[-1]) + if residual is not None: + residual = residual.view(-1, residual.shape[-1]) + + output = naive_norm(x, residual) + + if isinstance(output, tuple): + output = (output[0].view(orig_shape), output[1].view(orig_shape)) + else: + output = output.view(orig_shape) + return output + + +def rmsnorm_flashinfer( + x: torch.Tensor, + weight: torch.Tensor, + residual: Optional[torch.Tensor] = None, + eps: float = 1e-6, +): + orig_shape = x.shape + x = x.view(-1, x.shape[-1]) + if residual is not None: + residual = residual.view(-1, residual.shape[-1]) + + if residual is not None: + fused_add_rmsnorm(x, residual, weight, eps) + output = (x, residual) + else: + output = rmsnorm(x, weight, eps) + + if isinstance(output, tuple): + output = (output[0].view(orig_shape), output[1].view(orig_shape)) + else: + output = output.view(orig_shape) + return output + + +def rmsnorm_vllm( + x: torch.Tensor, + weight: torch.Tensor, + residual: Optional[torch.Tensor] = None, + eps: float = 1e-6, +): + orig_shape = x.shape + x = x.view(-1, x.shape[-1]) + if residual is not None: + residual = residual.view(-1, residual.shape[-1]) + + if residual is not None: + vllm_ops.fused_add_rms_norm(x, residual, weight, eps) + output = (x, residual) + else: + out = torch.empty_like(x) + vllm_ops.rms_norm(out, x, weight, eps) + output = out + + if isinstance(output, tuple): + output = (output[0].view(orig_shape), output[1].view(orig_shape)) + else: + output = output.view(orig_shape) + return output + + +def calculate_diff(batch_size, seq_len, hidden_size, use_residual=True): + dtype = torch.bfloat16 + x = torch.randn(batch_size, + seq_len, + hidden_size, + dtype=dtype, + device="cuda") + weight = torch.ones(hidden_size, dtype=dtype, device="cuda") + residual = torch.randn_like(x) if use_residual else None + + output_naive = rmsnorm_naive( + x.clone(), weight, + residual.clone() if residual is not None else None) + output_flashinfer = rmsnorm_flashinfer( + x.clone(), weight, + residual.clone() if residual is not None else None) + output_vllm = rmsnorm_vllm( + x.clone(), weight, + residual.clone() if residual is not None else None) + + if use_residual: + output_naive = output_naive[0] + output_flashinfer = output_flashinfer[0] + output_vllm = output_vllm[0] + + print(f"Naive output={output_naive}") + print(f"FlashInfer output={output_flashinfer}") + print(f"VLLM output={output_vllm}") + + if torch.allclose(output_naive, output_flashinfer, atol=1e-2, + rtol=1e-2) and torch.allclose( + output_naive, output_vllm, atol=1e-2, rtol=1e-2): + print("✅ All implementations match") + else: + print("❌ Implementations differ") + + +batch_size_range = [2**i for i in range(0, 7, 2)] +seq_length_range = [2**i for i in range(6, 11, 1)] +head_num_range = [32, 48] +configs = list( + itertools.product(head_num_range, batch_size_range, seq_length_range)) + + +def get_benchmark(use_residual): + + @triton.testing.perf_report( + triton.testing.Benchmark( + x_names=["head_num", "batch_size", "seq_len"], + x_vals=[list(_) for _ in configs], + line_arg="provider", + line_vals=["huggingface", "flashinfer", "vllm"], + line_names=["HuggingFace", "FlashInfer", "vLLM"], + styles=[("blue", "-"), ("green", "-"), ("red", "-")], + ylabel="us", + plot_name= + f"rmsnorm-perf-{'with' if use_residual else 'without'}-residual", + args={}, + )) + def benchmark(head_num, batch_size, seq_len, provider): + dtype = torch.bfloat16 + hidden_size = head_num * 128 # assuming head_dim = 128 + + x = torch.randn(batch_size, + seq_len, + hidden_size, + dtype=dtype, + device="cuda") + weight = torch.ones(hidden_size, dtype=dtype, device="cuda") + residual = torch.randn_like(x) if use_residual else None + + quantiles = [0.5, 0.2, 0.8] + + if provider == "huggingface": + ms, min_ms, max_ms = triton.testing.do_bench( + lambda: rmsnorm_naive( + x.clone(), + weight, + residual.clone() if residual is not None else None, + ), + quantiles=quantiles, + ) + elif provider == "flashinfer": + ms, min_ms, max_ms = triton.testing.do_bench( + lambda: rmsnorm_flashinfer( + x.clone(), + weight, + residual.clone() if residual is not None else None, + ), + quantiles=quantiles, + ) + else: + ms, min_ms, max_ms = triton.testing.do_bench( + lambda: rmsnorm_vllm( + x.clone(), + weight, + residual.clone() if residual is not None else None, + ), + quantiles=quantiles, + ) + + return 1000 * ms, 1000 * max_ms, 1000 * min_ms + + return benchmark + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--batch-size", + type=int, + default=4, + help="Batch size", + ) + parser.add_argument( + "--seq-len", + type=int, + default=128, + help="Sequence length", + ) + parser.add_argument( + "--hidden-size", + type=int, + default=4096, + help="Hidden size (2nd dimension) of the sequence", + ) + parser.add_argument("--use-residual", + action="store_true", + help="Whether to use residual connection") + parser.add_argument( + "--save-path", + type=str, + default="./configs/rmsnorm/", + help="Path to save rmsnorm benchmark results", + ) + + args = parser.parse_args() + + # Run correctness test + calculate_diff(batch_size=args.batch_size, + seq_len=args.seq_len, + hidden_size=args.hidden_size, + use_residual=args.use_residual) + + # Get the benchmark function with proper use_residual setting + benchmark = get_benchmark(args.use_residual) + # Run performance benchmark + benchmark.run(print_data=True, save_path=args.save_path) From f9ecbb18bf03338a4272c933a49a87021363b048 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Tue, 17 Dec 2024 16:37:04 +0800 Subject: [PATCH 27/72] [Misc] Allow passing logits_soft_cap for xformers backend (#11252) Signed-off-by: Isotr0py <2037008807@qq.com> --- vllm/attention/backends/xformers.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/vllm/attention/backends/xformers.py b/vllm/attention/backends/xformers.py index e2e989efb020c..3e59b3603d2c6 100644 --- a/vllm/attention/backends/xformers.py +++ b/vllm/attention/backends/xformers.py @@ -17,9 +17,7 @@ is_all_cross_attn_metadata_set, is_all_encoder_attn_metadata_set) from vllm.attention.ops.paged_attn import (PagedAttention, PagedAttentionMetadata) -from vllm.logger import init_logger - -logger = init_logger(__name__) +from vllm.utils import print_warning_once class XFormersBackend(AttentionBackend): @@ -386,8 +384,8 @@ def __init__( raise ValueError( "XFormers does not support block-sparse attention.") if logits_soft_cap is not None: - raise ValueError( - "XFormers does not support attention logits soft capping.") + print_warning_once("XFormers does not support logits soft cap. " + "Outputs may be slightly off.") self.num_heads = num_heads self.head_size = head_size self.scale = float(scale) From 2d1b9baa8f57fc59912c7bcd07fd630fb9d72c9d Mon Sep 17 00:00:00 2001 From: Joe Runde Date: Tue, 17 Dec 2024 13:26:32 -0700 Subject: [PATCH 28/72] [Bugfix] Fix request cancellation without polling (#11190) --- tests/entrypoints/openai/test_basic.py | 51 ++++++++++++++++ tests/test_utils.py | 6 +- tests/utils.py | 11 ++-- vllm/engine/async_llm_engine.py | 46 +++++++++------ vllm/entrypoints/api_server.py | 11 ++-- vllm/entrypoints/openai/api_server.py | 8 +++ vllm/entrypoints/openai/serving_chat.py | 5 -- vllm/entrypoints/openai/serving_completion.py | 3 +- vllm/entrypoints/openai/serving_embedding.py | 5 +- vllm/entrypoints/openai/serving_score.py | 5 +- vllm/entrypoints/utils.py | 57 ++++++++++++++++++ vllm/utils.py | 59 ++----------------- 12 files changed, 164 insertions(+), 103 deletions(-) create mode 100644 vllm/entrypoints/utils.py diff --git a/tests/entrypoints/openai/test_basic.py b/tests/entrypoints/openai/test_basic.py index 4616f363cc04a..547c1fd020928 100644 --- a/tests/entrypoints/openai/test_basic.py +++ b/tests/entrypoints/openai/test_basic.py @@ -1,6 +1,8 @@ +import asyncio from http import HTTPStatus from typing import List +import openai import pytest import pytest_asyncio import requests @@ -103,3 +105,52 @@ async def test_check_health(server: RemoteOpenAIServer): response = requests.get(server.url_for("health")) assert response.status_code == HTTPStatus.OK + + +@pytest.mark.parametrize( + "server_args", + [ + pytest.param(["--max-model-len", "10100"], + id="default-frontend-multiprocessing"), + pytest.param( + ["--disable-frontend-multiprocessing", "--max-model-len", "10100"], + id="disable-frontend-multiprocessing") + ], + indirect=True, +) +@pytest.mark.asyncio +async def test_request_cancellation(server: RemoteOpenAIServer): + # clunky test: send an ungodly amount of load in with short timeouts + # then ensure that it still responds quickly afterwards + + chat_input = [{"role": "user", "content": "Write a long story"}] + client = server.get_async_client(timeout=0.5) + tasks = [] + # Request about 2 million tokens + for _ in range(200): + task = asyncio.create_task( + client.chat.completions.create(messages=chat_input, + model=MODEL_NAME, + max_tokens=10000, + extra_body={"min_tokens": 10000})) + tasks.append(task) + + done, pending = await asyncio.wait(tasks, + return_when=asyncio.ALL_COMPLETED) + + # Make sure all requests were sent to the server and timed out + # (We don't want to hide other errors like 400s that would invalidate this + # test) + assert len(pending) == 0 + for d in done: + with pytest.raises(openai.APITimeoutError): + d.result() + + # If the server had not cancelled all the other requests, then it would not + # be able to respond to this one within the timeout + client = server.get_async_client(timeout=5) + response = await client.chat.completions.create(messages=chat_input, + model=MODEL_NAME, + max_tokens=10) + + assert len(response.choices) == 1 diff --git a/tests/test_utils.py b/tests/test_utils.py index 0bc9e5bc32a46..32a6b0aed66aa 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,6 @@ import asyncio import os import socket -from functools import partial from typing import AsyncIterator, Tuple import pytest @@ -26,10 +25,7 @@ async def mock_async_iterator(idx: int): print(f"iterator {idx} cancelled") iterators = [mock_async_iterator(i) for i in range(3)] - merged_iterator = merge_async_iterators(*iterators, - is_cancelled=partial(asyncio.sleep, - 0, - result=False)) + merged_iterator = merge_async_iterators(*iterators) async def stream_output(generator: AsyncIterator[Tuple[int, str]]): async for idx, output in generator: diff --git a/tests/utils.py b/tests/utils.py index afeb708f3bcdc..bf3d88194e4ca 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -163,12 +163,11 @@ def get_client(self): api_key=self.DUMMY_API_KEY, ) - def get_async_client(self): - return openai.AsyncOpenAI( - base_url=self.url_for("v1"), - api_key=self.DUMMY_API_KEY, - max_retries=0, - ) + def get_async_client(self, **kwargs): + return openai.AsyncOpenAI(base_url=self.url_for("v1"), + api_key=self.DUMMY_API_KEY, + max_retries=0, + **kwargs) def _test_completion( diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 32396fd10188d..f50e20cf70323 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -1065,16 +1065,20 @@ async def generate( >>> # Process and return the final output >>> ... """ - async for output in await self.add_request( - request_id, - prompt, - sampling_params, - lora_request=lora_request, - trace_headers=trace_headers, - prompt_adapter_request=prompt_adapter_request, - priority=priority, - ): - yield LLMEngine.validate_output(output, RequestOutput) + try: + async for output in await self.add_request( + request_id, + prompt, + sampling_params, + lora_request=lora_request, + trace_headers=trace_headers, + prompt_adapter_request=prompt_adapter_request, + priority=priority, + ): + yield LLMEngine.validate_output(output, RequestOutput) + except asyncio.CancelledError: + await self.abort(request_id) + raise async def encode( self, @@ -1147,15 +1151,19 @@ async def encode( >>> # Process and return the final output >>> ... """ - async for output in await self.add_request( - request_id, - prompt, - pooling_params, - lora_request=lora_request, - trace_headers=trace_headers, - priority=priority, - ): - yield LLMEngine.validate_output(output, PoolingRequestOutput) + try: + async for output in await self.add_request( + request_id, + prompt, + pooling_params, + lora_request=lora_request, + trace_headers=trace_headers, + priority=priority, + ): + yield LLMEngine.validate_output(output, PoolingRequestOutput) + except asyncio.CancelledError: + await self.abort(request_id) + raise async def abort(self, request_id: str) -> None: """Abort a request. diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py index ea3c93f733038..95da1c6e7b9bf 100644 --- a/vllm/entrypoints/api_server.py +++ b/vllm/entrypoints/api_server.py @@ -17,11 +17,11 @@ from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.entrypoints.launcher import serve_http +from vllm.entrypoints.utils import with_cancellation from vllm.logger import init_logger from vllm.sampling_params import SamplingParams from vllm.usage.usage_lib import UsageContext -from vllm.utils import (FlexibleArgumentParser, iterate_with_cancellation, - random_uuid) +from vllm.utils import FlexibleArgumentParser, random_uuid from vllm.version import __version__ as VLLM_VERSION logger = init_logger("vllm.entrypoints.api_server") @@ -47,6 +47,11 @@ async def generate(request: Request) -> Response: - other fields: the sampling parameters (See `SamplingParams` for details). """ request_dict = await request.json() + return await _generate(request_dict, raw_request=request) + + +@with_cancellation +async def _generate(request_dict: dict, raw_request: Request) -> Response: prompt = request_dict.pop("prompt") stream = request_dict.pop("stream", False) sampling_params = SamplingParams(**request_dict) @@ -54,8 +59,6 @@ async def generate(request: Request) -> Response: assert engine is not None results_generator = engine.generate(prompt, sampling_params, request_id) - results_generator = iterate_with_cancellation( - results_generator, is_cancelled=request.is_disconnected) # Streaming case async def stream_results() -> AsyncGenerator[bytes, None]: diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 14e3a34ce141c..00e2d1a56f160 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -59,6 +59,7 @@ from vllm.entrypoints.openai.serving_tokenization import ( OpenAIServingTokenization) from vllm.entrypoints.openai.tool_parsers import ToolParserManager +from vllm.entrypoints.utils import with_cancellation from vllm.logger import init_logger from vllm.usage.usage_lib import UsageContext from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path, @@ -311,6 +312,7 @@ async def health(raw_request: Request) -> Response: @router.post("/tokenize") +@with_cancellation async def tokenize(request: TokenizeRequest, raw_request: Request): handler = tokenization(raw_request) @@ -325,6 +327,7 @@ async def tokenize(request: TokenizeRequest, raw_request: Request): @router.post("/detokenize") +@with_cancellation async def detokenize(request: DetokenizeRequest, raw_request: Request): handler = tokenization(raw_request) @@ -353,6 +356,7 @@ async def show_version(): @router.post("/v1/chat/completions") +@with_cancellation async def create_chat_completion(request: ChatCompletionRequest, raw_request: Request): handler = chat(raw_request) @@ -373,6 +377,7 @@ async def create_chat_completion(request: ChatCompletionRequest, @router.post("/v1/completions") +@with_cancellation async def create_completion(request: CompletionRequest, raw_request: Request): handler = completion(raw_request) if handler is None: @@ -390,6 +395,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request): @router.post("/v1/embeddings") +@with_cancellation async def create_embedding(request: EmbeddingRequest, raw_request: Request): handler = embedding(raw_request) if handler is None: @@ -407,6 +413,7 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request): @router.post("/score") +@with_cancellation async def create_score(request: ScoreRequest, raw_request: Request): handler = score(raw_request) if handler is None: @@ -424,6 +431,7 @@ async def create_score(request: ScoreRequest, raw_request: Request): @router.post("/v1/score") +@with_cancellation async def create_score_v1(request: ScoreRequest, raw_request: Request): logger.warning( "To indicate that Score API is not part of standard OpenAI API, we " diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 527418c635093..81bce0dd370bb 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -32,7 +32,6 @@ from vllm.sequence import Logprob from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer from vllm.transformers_utils.tokenizers import maybe_serialize_tool_calls -from vllm.utils import iterate_with_cancellation logger = init_logger(__name__) @@ -234,10 +233,6 @@ async def create_chat_completion( assert len(generators) == 1 result_generator, = generators - if raw_request: - result_generator = iterate_with_cancellation( - result_generator, raw_request.is_disconnected) - # Streaming response if request.stream: return self.chat_completion_stream_generator( diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index bd39a4c42e938..5cf9df92e296e 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -159,8 +159,7 @@ async def create_completion( # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) - result_generator = merge_async_iterators( - *generators, is_cancelled=raw_request.is_disconnected) + result_generator = merge_async_iterators(*generators) model_name = self._get_model_name(lora_request) num_prompts = len(engine_prompts) diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py index fd501ad4f833e..879276646d2ba 100644 --- a/vllm/entrypoints/openai/serving_embedding.py +++ b/vllm/entrypoints/openai/serving_embedding.py @@ -202,10 +202,7 @@ async def create_embedding( # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) - result_generator = merge_async_iterators( - *generators, - is_cancelled=raw_request.is_disconnected if raw_request else None, - ) + result_generator = merge_async_iterators(*generators) num_prompts = len(engine_prompts) diff --git a/vllm/entrypoints/openai/serving_score.py b/vllm/entrypoints/openai/serving_score.py index 6f5cc14ac37cc..101d170bee4d6 100644 --- a/vllm/entrypoints/openai/serving_score.py +++ b/vllm/entrypoints/openai/serving_score.py @@ -186,10 +186,7 @@ async def create_score( # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) - result_generator = merge_async_iterators( - *generators, - is_cancelled=raw_request.is_disconnected if raw_request else None, - ) + result_generator = merge_async_iterators(*generators) num_prompts = len(engine_prompts) diff --git a/vllm/entrypoints/utils.py b/vllm/entrypoints/utils.py new file mode 100644 index 0000000000000..e8a78d216d0f0 --- /dev/null +++ b/vllm/entrypoints/utils.py @@ -0,0 +1,57 @@ +import asyncio +import functools + +from fastapi import Request + + +async def listen_for_disconnect(request: Request) -> None: + """Returns if a disconnect message is received""" + while True: + message = await request.receive() + if message["type"] == "http.disconnect": + break + + +def with_cancellation(handler_func): + """Decorator that allows a route handler to be cancelled by client + disconnections. + + This does _not_ use request.is_disconnected, which does not work with + middleware. Instead this follows the pattern from + starlette.StreamingResponse, which simultaneously awaits on two tasks- one + to wait for an http disconnect message, and the other to do the work that we + want done. When the first task finishes, the other is cancelled. + + A core assumption of this method is that the body of the request has already + been read. This is a safe assumption to make for fastapi handlers that have + already parsed the body of the request into a pydantic model for us. + This decorator is unsafe to use elsewhere, as it will consume and throw away + all incoming messages for the request while it looks for a disconnect + message. + + In the case where a `StreamingResponse` is returned by the handler, this + wrapper will stop listening for disconnects and instead the response object + will start listening for disconnects. + """ + + # Functools.wraps is required for this wrapper to appear to fastapi as a + # normal route handler, with the correct request type hinting. + @functools.wraps(handler_func) + async def wrapper(*args, **kwargs): + + # The request is either the second positional arg or `raw_request` + request = args[1] if len(args) > 1 else kwargs["raw_request"] + + handler_task = asyncio.create_task(handler_func(*args, **kwargs)) + cancellation_task = asyncio.create_task(listen_for_disconnect(request)) + + done, pending = await asyncio.wait([handler_task, cancellation_task], + return_when=asyncio.FIRST_COMPLETED) + for task in pending: + task.cancel() + + if handler_task in done: + return handler_task.result() + return None + + return wrapper diff --git a/vllm/utils.py b/vllm/utils.py index 73d2ae25f15ca..38c7dea6d2d3d 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -20,7 +20,7 @@ import uuid import warnings import weakref -from asyncio import FIRST_COMPLETED, AbstractEventLoop, Future, Task +from asyncio import FIRST_COMPLETED, AbstractEventLoop, Task from collections import UserDict, defaultdict from collections.abc import Iterable, Mapping from dataclasses import dataclass, field @@ -370,72 +370,23 @@ def _next_task(iterator: AsyncGenerator[T, None], return loop.create_task(iterator.__anext__()) # type: ignore[arg-type] -async def iterate_with_cancellation( - iterator: AsyncGenerator[T, None], - is_cancelled: Callable[[], Awaitable[bool]], -) -> AsyncGenerator[T, None]: - """Convert async iterator into one that polls the provided function - at least once per second to check for client cancellation. - """ - - loop = asyncio.get_running_loop() - - awaits: List[Future[T]] = [_next_task(iterator, loop)] - next_cancel_check: float = 0 - while True: - done, pending = await asyncio.wait(awaits, timeout=1.5) - - # Check for cancellation at most once per second - time_now = time.time() - if time_now >= next_cancel_check: - if await is_cancelled(): - with contextlib.suppress(BaseException): - awaits[0].cancel() - await iterator.aclose() - raise asyncio.CancelledError("client cancelled") - next_cancel_check = time_now + 1 - - if done: - try: - item = await awaits[0] - awaits[0] = _next_task(iterator, loop) - yield item - except StopAsyncIteration: - # we are done - return - - async def merge_async_iterators( - *iterators: AsyncGenerator[T, None], - is_cancelled: Optional[Callable[[], Awaitable[bool]]] = None, -) -> AsyncGenerator[Tuple[int, T], None]: + *iterators: AsyncGenerator[T, + None], ) -> AsyncGenerator[Tuple[int, T], None]: """Merge multiple asynchronous iterators into a single iterator. This method handle the case where some iterators finish before others. When it yields, it yields a tuple (i, item) where i is the index of the iterator that yields the item. - - It also optionally polls a provided function at least once per second - to check for client cancellation. """ loop = asyncio.get_running_loop() awaits = {_next_task(pair[1], loop): pair for pair in enumerate(iterators)} - timeout = None if is_cancelled is None else 1.5 - next_cancel_check: float = 0 try: while awaits: - done, pending = await asyncio.wait(awaits.keys(), - return_when=FIRST_COMPLETED, - timeout=timeout) - if is_cancelled is not None: - # Check for cancellation at most once per second - time_now = time.time() - if time_now >= next_cancel_check: - if await is_cancelled(): - raise asyncio.CancelledError("client cancelled") - next_cancel_check = time_now + 1 + done, _ = await asyncio.wait(awaits.keys(), + return_when=FIRST_COMPLETED) for d in done: pair = awaits.pop(d) try: From c77eb8a33ceb62858d951ffef87ae626a0d09973 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 17 Dec 2024 19:34:06 -0500 Subject: [PATCH 29/72] [Bugfix] Set temperature=0.7 in test_guided_choice_chat (#11264) --- tests/entrypoints/openai/test_chat.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index 8d23a2be6f9bb..47c521a9b5eb5 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -482,6 +482,7 @@ async def test_guided_choice_chat(client: openai.AsyncOpenAI, model=MODEL_NAME, messages=messages, max_completion_tokens=10, + temperature=0.7, extra_body=dict(guided_choice=sample_guided_choice, guided_decoding_backend=guided_decoding_backend)) choice1 = chat_completion.choices[0].message.content @@ -496,6 +497,7 @@ async def test_guided_choice_chat(client: openai.AsyncOpenAI, model=MODEL_NAME, messages=messages, max_completion_tokens=10, + temperature=0.7, extra_body=dict(guided_choice=sample_guided_choice, guided_decoding_backend=guided_decoding_backend)) choice2 = chat_completion.choices[0].message.content From bf8717ebaea8d74279df84fbe127ad22cf62e219 Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Tue, 17 Dec 2024 16:37:59 -0800 Subject: [PATCH 30/72] [V1] Prefix caching for vision language models (#11187) Signed-off-by: Cody Yu --- tests/v1/core/test_prefix_caching.py | 88 +++++++++++++++++++- tests/v1/engine/test_engine_args.py | 15 ---- vllm/engine/arg_utils.py | 27 ++++--- vllm/inputs/data.py | 20 +++++ vllm/multimodal/inputs.py | 3 + vllm/v1/core/kv_cache_manager.py | 74 +++++++++++------ vllm/v1/core/kv_cache_utils.py | 115 ++++++++++++++++++++++++--- vllm/v1/core/scheduler.py | 2 + vllm/v1/engine/async_llm.py | 10 ++- vllm/v1/engine/core.py | 8 +- vllm/v1/engine/llm_engine.py | 9 ++- vllm/v1/engine/mm_input_mapper.py | 33 ++++---- vllm/v1/engine/processor.py | 12 +-- vllm/v1/request.py | 24 +++++- 14 files changed, 342 insertions(+), 98 deletions(-) diff --git a/tests/v1/core/test_prefix_caching.py b/tests/v1/core/test_prefix_caching.py index 00f7b0fcfe1dc..ed04f0a373c51 100644 --- a/tests/v1/core/test_prefix_caching.py +++ b/tests/v1/core/test_prefix_caching.py @@ -2,16 +2,23 @@ import pytest from vllm.inputs import token_inputs +from vllm.multimodal.inputs import PlaceholderRange from vllm.sampling_params import SamplingParams from vllm.utils import cdiv from vllm.v1.core.kv_cache_manager import KVCacheManager, Request from vllm.v1.core.kv_cache_utils import KVCacheBlock, hash_block_tokens -def make_request(request_id, prompt_token_ids): +def make_request(request_id, + prompt_token_ids, + mm_positions=None, + mm_hashes=None): return Request( request_id=request_id, - inputs=token_inputs(prompt_token_ids=prompt_token_ids), + inputs=token_inputs(prompt_token_ids=prompt_token_ids, + multi_modal_placeholders={"image": mm_positions} + if mm_positions else None, + multi_modal_hashes=mm_hashes), sampling_params=SamplingParams(max_tokens=17), eos_token_id=100, arrival_time=0, @@ -38,6 +45,7 @@ def test_prefill(): all_token_ids = common_token_ids + unique_token_ids req0 = make_request("0", all_token_ids) computed_blocks = manager.get_computed_blocks(req0) + assert len(req0.kv_block_hashes) == 3 assert not computed_blocks blocks = manager.allocate_slots(req0, 55, computed_blocks) assert [b.block_id for b in blocks] == [0, 1, 2, 3, 4] @@ -61,6 +69,7 @@ def test_prefill(): unique_token_ids = [3] * 5 req1 = make_request("1", common_token_ids + unique_token_ids) computed_blocks = manager.get_computed_blocks(req1) + assert len(req1.kv_block_hashes) == 3 assert [b.block_id for b in computed_blocks] == [0, 1, 2] num_new_tokens = 53 - 3 * 16 blocks = manager.allocate_slots(req1, num_new_tokens, computed_blocks) @@ -90,6 +99,7 @@ def test_prefill(): unique_token_ids = [3] * 6 req2 = make_request("2", common_token_ids + unique_token_ids) computed_block = manager.get_computed_blocks(req2) + assert len(req2.kv_block_hashes) == 3 assert [b.block_id for b in computed_block] == [0, 1, 2] num_new_tokens = 53 - 3 * 16 blocks = manager.allocate_slots(req2, num_new_tokens, computed_blocks) @@ -416,3 +426,77 @@ def test_cache_blocks(): ) assert len(manager.cached_block_hash_to_block) == 3 assert blocks[0].block_hash is not None + + +def test_mm_prefix_caching(): + """ + This tests that the multi-modal prefix caching is correct. + """ + manager = KVCacheManager( + block_size=16, + num_gpu_blocks=10, + max_model_len=8192, + sliding_window=None, + enable_caching=True, + num_preallocate_tokens=16, + ) + + # Common prompt tokens (T is text tokens and P is image placeholder tokens) + # [T,...,T, P0,...,P0], [P0,...,P0,T,...,T,P1,...,P1], [P1,...,P1] + common_token_ids = list(range(10)) + [-1] * 6 + common_token_ids += [-1] * 4 + list(range(10, 20)) + [-1] * 2 + common_token_ids += [-1] * 16 + + common_mm_positions = [ + PlaceholderRange(offset=11, length=10), + PlaceholderRange(offset=30, length=18), + ] + common_mm_hashes = ["aaa", "bbb"] + + # A unique image plus some text tokens. + unique_token_ids = [-1] * 7 + [100] * 4 + all_token_ids = common_token_ids + unique_token_ids + mm_positions = common_mm_positions + [ + PlaceholderRange(offset=48, length=7) + ] + mm_hashes = common_mm_hashes + ["ccc"] + req0 = make_request("0", + all_token_ids, + mm_positions=mm_positions, + mm_hashes=mm_hashes) + computed_blocks = manager.get_computed_blocks(req0) + + # Completed block should have hashes with extra keys. + assert not computed_blocks + assert len(req0.kv_block_hashes) == 3 + assert req0.kv_block_hashes[0].extra_keys == (("aaa", 0), ) + assert req0.kv_block_hashes[1].extra_keys == (("aaa", 5), ("bbb", 0)) + assert req0.kv_block_hashes[2].extra_keys == (("bbb", 2), ) + + blocks = manager.allocate_slots(req0, 59, computed_blocks) + assert [b.block_id for b in blocks] == [0, 1, 2, 3, 4] + req0.num_computed_tokens = 59 + + # Append slots without allocating a new block. + for _ in range(5): + req0.append_output_token_ids(8) + new_blocks = manager.append_slots(req0, 5) + assert new_blocks is not None and len(new_blocks) == 0 + + # The just completed block should have hashes with extra keys. + assert len(req0.kv_block_hashes) == 4 + assert req0.kv_block_hashes[3].extra_keys == (("ccc", 0), ) + + # Cache hit. + unique_token_ids = [-1] * 7 + [200] * 5 + all_token_ids = common_token_ids + unique_token_ids + mm_positions = common_mm_positions + [ + PlaceholderRange(offset=48, length=7) + ] + mm_hashes = common_mm_hashes + ["ccc"] + req1 = make_request("1", + all_token_ids, + mm_positions=mm_positions, + mm_hashes=mm_hashes) + computed_blocks = manager.get_computed_blocks(req1) + assert len(computed_blocks) == 3 diff --git a/tests/v1/engine/test_engine_args.py b/tests/v1/engine/test_engine_args.py index ac5e7dde525a7..ff38a4568ecb1 100644 --- a/tests/v1/engine/test_engine_args.py +++ b/tests/v1/engine/test_engine_args.py @@ -31,14 +31,6 @@ def test_prefix_caching_from_cli(): assert engine_args.enable_prefix_caching -def test_defaults(): - engine_args = EngineArgs(model="facebook/opt-125m") - - # Assert V1 defaults - assert (engine_args.enable_prefix_caching - ), "V1 turns on prefix caching by default" - - def test_defaults_with_usage_context(): engine_args = EngineArgs(model="facebook/opt-125m") vllm_config: VllmConfig = engine_args.create_engine_config( @@ -52,10 +44,3 @@ def test_defaults_with_usage_context(): UsageContext.OPENAI_API_SERVER) assert vllm_config.scheduler_config.max_num_seqs == 1024 assert vllm_config.scheduler_config.max_num_batched_tokens == 2048 - - -def test_prefix_cache_disabled_with_multimodel(): - engine_args = EngineArgs(model="llava-hf/llava-1.5-7b-hf") - - vllm_config = engine_args.create_engine_config(UsageContext.LLM_CLASS) - assert not vllm_config.cache_config.enable_prefix_caching diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index f6d276fe7c0c8..674577f23eba6 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -205,6 +205,7 @@ def __post_init__(self): # by user. if self.enable_prefix_caching is None: self.enable_prefix_caching = bool(envs.VLLM_USE_V1) + # Override max_num_seqs if it's not set by user. if self.max_num_seqs is None: self.max_num_seqs = 256 if not envs.VLLM_USE_V1 else 1024 @@ -1026,11 +1027,11 @@ def create_engine_config(self, device_config = DeviceConfig(device=self.device) model_config = self.create_model_config() - if model_config.is_multimodal_model: - if self.enable_prefix_caching: - logger.warning( - "--enable-prefix-caching is currently not " - "supported for multimodal models and has been disabled.") + if (model_config.is_multimodal_model and not envs.VLLM_USE_V1 + and self.enable_prefix_caching): + logger.warning("--enable-prefix-caching is currently not " + "supported for multimodal models in v0 and " + "has been disabled.") self.enable_prefix_caching = False cache_config = CacheConfig( @@ -1249,11 +1250,14 @@ def _override_v1_engine_args(self, usage_context: UsageContext) -> None: # When no user override, set the default values based on the usage # context. # TODO(woosuk): Tune the default values for different hardware. - if self.max_num_batched_tokens is None: - if usage_context == UsageContext.LLM_CLASS: - self.max_num_batched_tokens = 8192 - elif usage_context == UsageContext.OPENAI_API_SERVER: - self.max_num_batched_tokens = 2048 + default_max_num_batched_tokens = { + UsageContext.LLM_CLASS: 8192, + UsageContext.OPENAI_API_SERVER: 2048, + } + if (self.max_num_batched_tokens is None + and usage_context in default_max_num_batched_tokens): + self.max_num_batched_tokens = default_max_num_batched_tokens[ + usage_context] logger.warning( "Setting max_num_batched_tokens to %d for %s usage context.", self.max_num_batched_tokens, usage_context.value) @@ -1263,9 +1267,6 @@ def _override_v1_engine_config(self, engine_config: VllmConfig) -> None: Override the EngineConfig's configs based on the usage context for V1. """ assert envs.VLLM_USE_V1, "V1 is not enabled" - if engine_config.model_config.is_multimodal_model: - # TODO (ywang96): Enable APC by default when VLM supports it. - assert not engine_config.cache_config.enable_prefix_caching @dataclass diff --git a/vllm/inputs/data.py b/vllm/inputs/data.py index 85aaaa776907f..d54cbb5c37819 100644 --- a/vllm/inputs/data.py +++ b/vllm/inputs/data.py @@ -162,6 +162,11 @@ class TokenInputs(TypedDict): Placeholder ranges for the multi-modal data. """ + multi_modal_hashes: NotRequired[List[str]] + """ + The hashes of the multi-modal data. + """ + mm_processor_kwargs: NotRequired[Dict[str, Any]] """ Optional multi-modal processor kwargs to be forwarded to the @@ -177,6 +182,7 @@ def token_inputs( prompt: Optional[str] = None, multi_modal_data: Optional["MultiModalDataDict"] = None, multi_modal_inputs: Optional["MultiModalKwargs"] = None, + multi_modal_hashes: Optional[List[str]] = None, multi_modal_placeholders: Optional["MultiModalPlaceholderDict"] = None, mm_processor_kwargs: Optional[Dict[str, Any]] = None, ) -> TokenInputs: @@ -191,6 +197,8 @@ def token_inputs( inputs["multi_modal_data"] = multi_modal_data if multi_modal_inputs is not None: inputs["multi_modal_inputs"] = multi_modal_inputs + if multi_modal_hashes is not None: + inputs["multi_modal_hashes"] = multi_modal_hashes if multi_modal_placeholders is not None: inputs["multi_modal_placeholders"] = multi_modal_placeholders if mm_processor_kwargs is not None: @@ -295,6 +303,18 @@ def multi_modal_inputs(self) -> Union[Dict, "MultiModalKwargs"]: assert_never(inputs) + @cached_property + def multi_modal_hashes(self) -> List[str]: + inputs = self.inputs + + if inputs["type"] == "token": + return inputs.get("multi_modal_hashes", []) + + if inputs["type"] == "multimodal": + return inputs.get("mm_hashes", []) + + assert_never(inputs) + @cached_property def multi_modal_placeholders(self) -> "MultiModalPlaceholderDict": inputs = self.inputs diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py index 229a8fbdf5831..c00943a5f26d9 100644 --- a/vllm/multimodal/inputs.py +++ b/vllm/multimodal/inputs.py @@ -215,6 +215,9 @@ class MultiModalInputsV2(TypedDict): mm_kwargs: MultiModalKwargs """Keyword arguments to be directly passed to the model after batching.""" + mm_hashes: NotRequired[List[str]] + """The hashes of the multi-modal data.""" + mm_placeholders: MultiModalPlaceholderDict """ For each modality, information about the placeholder tokens in diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py index aaa44c930e324..61a3f5fd6d841 100644 --- a/vllm/v1/core/kv_cache_manager.py +++ b/vllm/v1/core/kv_cache_manager.py @@ -4,7 +4,9 @@ from vllm.logger import init_logger from vllm.utils import cdiv from vllm.v1.core.kv_cache_utils import (BlockHashType, FreeKVCacheBlockQueue, - KVCacheBlock, hash_block_tokens, + KVCacheBlock, + generate_block_hash_extra_keys, + hash_block_tokens, hash_request_tokens) from vllm.v1.request import Request @@ -83,10 +85,12 @@ def get_computed_blocks(self, request: Request) -> List[KVCacheBlock]: computed_blocks = [] - # TODO(rickyx): potentially we could cache this so we don't have to - # recompute it every time. - block_hashes = hash_request_tokens(self.block_size, - request.all_token_ids) + # The block hashes for the request may already be computed + # if the request was preempted and resumed. + if not request.kv_block_hashes: + request.set_kv_block_hashes( + hash_request_tokens(self.block_size, request)) + block_hashes = request.kv_block_hashes for block_hash in block_hashes: # block_hashes is a chain of block hashes. If a block hash is not @@ -242,14 +246,16 @@ def allocate_slots( num_computed_tokens = len(computed_blocks) * self.block_size num_full_blocks = (num_computed_tokens + num_tokens) // self.block_size - self._cache_full_blocks( - request=request, - blk_start_idx=len(computed_blocks), - # The new full blocks are the full blocks that are not computed. - full_blocks=self.req_to_blocks[request.request_id] - [len(computed_blocks):num_full_blocks], - prev_block=computed_blocks[-1] if computed_blocks else None, - ) + new_full_blocks = self.req_to_blocks[ + request.request_id][len(computed_blocks):num_full_blocks] + if new_full_blocks: + self._cache_full_blocks( + request=request, + blk_start_idx=len(computed_blocks), + # The new full blocks are the full blocks that are not computed. + full_blocks=new_full_blocks, + prev_block=computed_blocks[-1] if computed_blocks else None, + ) return new_blocks @@ -376,6 +382,8 @@ def _cache_full_blocks( full_blocks: The list of blocks to update hash metadata. prev_block: The previous block in the chain. """ + num_cached_block_hashes = len(request.kv_block_hashes) + # Update the new blocks with the block hashes through the chain. prev_block_hash_value = None if prev_block is not None: @@ -387,17 +395,35 @@ def _cache_full_blocks( for i, blk in enumerate(full_blocks): blk_idx = blk_start_idx + i - block_tokens = request.all_token_ids[blk_idx * - self.block_size:(blk_idx + - 1) * - self.block_size] - assert len(block_tokens) == self.block_size, ( - f"Expected {self.block_size} tokens, got {len(block_tokens)} " - f"at {blk_idx}th block for request " - f"{request.request_id}({request})") - - # Compute the hash of the current block. - block_hash = hash_block_tokens(prev_block_hash_value, block_tokens) + if blk_idx < num_cached_block_hashes: + # The block hash may already be computed in + # "get_computed_blocks" if the tokens are not generated by + # this request (either the prompt tokens or the previously + # generated tokens with preemption). In this case we simply + # reuse the block hash. + block_hash = request.kv_block_hashes[blk_idx] + else: + # Otherwise compute the block hash and cache it in the request + # in case it will be preempted in the future. + start_token_idx = blk_idx * self.block_size + end_token_idx = (blk_idx + 1) * self.block_size + block_tokens = request.all_token_ids[ + start_token_idx:end_token_idx] + assert len(block_tokens) == self.block_size, ( + f"Expected {self.block_size} tokens, got " + f"{len(block_tokens)} at {blk_idx}th block for request " + f"{request.request_id}({request})") + + # Generate extra keys for multi-modal inputs. Note that since + # we reach to this branch only when the block is completed with + # generated tokens, we only need to consider the last mm input. + extra_keys, _ = generate_block_hash_extra_keys( + request, start_token_idx, end_token_idx, -1) + + # Compute the hash of the current block. + block_hash = hash_block_tokens(prev_block_hash_value, + block_tokens, extra_keys) + request.append_kv_block_hashes(block_hash) # Update and added the full block to the cache. blk.block_hash = block_hash diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index 0ba338aa5a3d2..d80ea128c7749 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -1,20 +1,25 @@ """KV-Cache Utilities.""" from collections.abc import Sequence from dataclasses import dataclass -from typing import List, NamedTuple, Optional, Tuple +from typing import Any, List, NamedTuple, Optional, Tuple from vllm.logger import init_logger +from vllm.v1.request import Request logger = init_logger(__name__) class BlockHashType(NamedTuple): - """Hash value of a block and the token IDs in the block. - The reason we keep a tuple of token IDs is to make sure no hash - collision happens when the hash value is the same. + """Hash value of a block (int), the token IDs in the block, and extra keys. + The reason we keep a tuple of token IDs and extra keys is to make sure + no hash collision happens when the hash value is the same. """ + # Hash value of the block in an integer. hash_value: int + # Token IDs in the block. token_ids: Tuple[int, ...] + # Extra keys for the block. + extra_keys: Optional[Any] = None @dataclass @@ -159,8 +164,80 @@ def get_all_free_blocks(self) -> List[KVCacheBlock]: return ret -def hash_block_tokens(parent_block_hash: Optional[int], - curr_block_token_ids: Sequence[int]) -> BlockHashType: +def generate_block_hash_extra_keys( + request: Request, start_token_idx: int, end_token_idx: int, + start_mm_idx: int) -> Tuple[Optional[Tuple[Any, ...]], int]: + """Generate extra keys for the block hash. The extra keys can come from + the multi-modal inputs and request specific metadata (e.g., LoRA ID). + For multi-modal inputs, the extra keys are (mm_hash, start_offset) that + indicate a mm input contained in the block and its starting offset in + the block tokens. + + Args: + request: The request object. + start_token_idx: The start token index of the block. + end_token_idx: The end token index of the block. + start_mm_idx: The start multi-modal index of the block. + + Returns: + A tuple of extra keys and the next multi-modal index. + """ + + mm_positions, mm_hashes = request.mm_positions, request.mm_hashes + if not mm_positions: + return None, start_mm_idx + + if mm_positions and len(mm_positions) != len(mm_hashes): + raise ValueError( + "The number of multi-modal positions and hashes must match. This " + "is likely because you do not enable MM preprocessor hashing. " + "Please set mm_cache_preprocessor=True.") + + # Note that we assume mm_positions is sorted by offset. + # We do not need to check all mm inputs if the start token index is out of + # range. This usually happens in the late prefill phase and decoding phase. + if mm_positions[-1]["offset"] + mm_positions[-1][ + "length"] < start_token_idx: + return None, start_mm_idx + + # Support start_mm_idx == -1 to indicate the last mm input. + if start_mm_idx < 0: + assert -start_mm_idx <= len(mm_positions) + start_mm_idx = len(mm_positions) + start_mm_idx + + extra_keys = [] + curr_mm_idx = start_mm_idx + while mm_positions and curr_mm_idx < len(mm_positions): + assert mm_hashes[curr_mm_idx] is not None + offset = mm_positions[curr_mm_idx]["offset"] + length = mm_positions[curr_mm_idx]["length"] + if end_token_idx > offset: + if start_token_idx > offset + length: + # This block has passed the current mm input. + curr_mm_idx += 1 + continue + + # The block contains the current mm input. + mm_start = max(0, start_token_idx - offset) + extra_keys.append((mm_hashes[curr_mm_idx], mm_start)) + if end_token_idx >= offset + length: + # If this block contains the end of the current mm input, + # move to the next mm input as this block may also contain + # the next mm input. + curr_mm_idx += 1 + else: + # Otherwise this block is done with mm inputs. + break + else: + # This block has not reached the current mm input. + break + return tuple(extra_keys), curr_mm_idx + + +def hash_block_tokens( + parent_block_hash: Optional[int], + curr_block_token_ids: Sequence[int], + extra_keys: Optional[Tuple[Any, ...]] = None) -> BlockHashType: """Computes a hash value corresponding to the contents of a block and the contents of the preceding block(s). The hash value is used for prefix caching. We use LRU cache for this function to avoid recomputing @@ -174,27 +251,39 @@ def hash_block_tokens(parent_block_hash: Optional[int], if this is the first block. curr_block_token_ids: A list of token ids in the current block. The current block is assumed to be full. + extra_keys: Extra keys for the block. Returns: The hash value of the block and the token ids in the block. The entire tuple is used as the hash key of the block. """ return BlockHashType(hash((parent_block_hash, *curr_block_token_ids)), - tuple(curr_block_token_ids)) + tuple(curr_block_token_ids), extra_keys) def hash_request_tokens(block_size: int, - token_ids: Sequence[int]) -> List[BlockHashType]: + request: Request) -> List[BlockHashType]: """Computes hash values of a chain of blocks given a sequence of token IDs. The hash value is used for prefix caching. Args: block_size: The size of each block. - token_ids: A sequence of token ids in the request. + request: The request object. Returns: The list of computed hash values. """ + token_ids = request.all_token_ids + mm_positions, mm_hashes = request.mm_positions, request.mm_hashes + if mm_positions and len(mm_positions) != len(mm_hashes): + raise ValueError( + "The number of multi-modal positions and hashes must match.") + + # TODO: Extend this to support other features such as LoRA. + need_extra_keys = bool(mm_positions) + extra_keys = None + curr_mm_idx = 0 + ret = [] parent_block_hash_value = None for start in range(0, len(token_ids), block_size): @@ -203,8 +292,14 @@ def hash_request_tokens(block_size: int, # Do not hash the block if it is not full. if len(block_token_ids) < block_size: break + + # Add extra keys if the block is a multi-modal block. + if need_extra_keys: + extra_keys, curr_mm_idx = generate_block_hash_extra_keys( + request, start, end, curr_mm_idx) + block_hash = hash_block_tokens(parent_block_hash_value, - block_token_ids) + block_token_ids, extra_keys) ret.append(block_hash) parent_block_hash_value = block_hash.hash_value return ret diff --git a/vllm/v1/core/scheduler.py b/vllm/v1/core/scheduler.py index 178532e477dae..08e7c0fd4dc9b 100644 --- a/vllm/v1/core/scheduler.py +++ b/vllm/v1/core/scheduler.py @@ -516,6 +516,7 @@ class NewRequestData: prompt_token_ids: List[int] prompt: Optional[str] mm_inputs: List["MultiModalKwargs"] + mm_hashes: List[str] mm_positions: List["PlaceholderRange"] sampling_params: SamplingParams block_ids: List[int] @@ -533,6 +534,7 @@ def from_request( prompt_token_ids=request.prompt_token_ids, prompt=request.prompt, mm_inputs=request.mm_inputs, + mm_hashes=request.mm_hashes, mm_positions=request.mm_positions, sampling_params=request.sampling_params, block_ids=block_ids, diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index b36de5f66917c..41fb4b25d45bb 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -60,9 +60,13 @@ def __init__( self.client_aborted_requests: List[str] = [] # Processor (converts Inputs --> EngineCoreRequests). - self.processor = Processor(vllm_config.model_config, - vllm_config.lora_config, self.tokenizer, - input_registry) + self.processor = Processor( + model_config=vllm_config.model_config, + cache_config=vllm_config.cache_config, + lora_config=vllm_config.lora_config, + tokenizer=self.tokenizer, + input_registry=input_registry, + ) # Detokenizer (converts EngineCoreOutputs --> RequestOutput). self.detokenizer = Detokenizer( diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 56d4dc67e4a0e..497d5db5b4c99 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -65,7 +65,8 @@ def __init__( self._last_logging_time = time.time() - self.mm_input_mapper_server = MMInputMapperServer() + self.mm_input_mapper_server = MMInputMapperServer( + vllm_config.model_config) def _initialize_kv_caches(self, cache_config: CacheConfig) -> Tuple[int, int]: @@ -98,9 +99,8 @@ def add_request(self, request: EngineCoreRequest): # MM mapper, so anything that has a hash must have a HIT cache # entry here as well. assert request.mm_inputs is not None - request.mm_inputs, request.mm_hashes = ( - self.mm_input_mapper_server.process_inputs( - request.mm_inputs, request.mm_hashes)) + request.mm_inputs = self.mm_input_mapper_server.process_inputs( + request.mm_inputs, request.mm_hashes) req = Request.from_engine_core_request(request) diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index 15dedbd0f9529..bea8c5502f612 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -55,9 +55,12 @@ def __init__( self.tokenizer.ping() # Processor (convert Inputs --> EngineCoreRequests) - self.processor = Processor(vllm_config.model_config, - vllm_config.lora_config, self.tokenizer, - input_registry, mm_registry) + self.processor = Processor(model_config=vllm_config.model_config, + cache_config=vllm_config.cache_config, + lora_config=vllm_config.lora_config, + tokenizer=self.tokenizer, + input_registry=input_registry, + mm_registry=mm_registry) # Detokenizer (converts EngineCoreOutputs --> RequestOutput) self.detokenizer = Detokenizer( diff --git a/vllm/v1/engine/mm_input_mapper.py b/vllm/v1/engine/mm_input_mapper.py index 6cdeba6f3f71e..e53ba092ede04 100644 --- a/vllm/v1/engine/mm_input_mapper.py +++ b/vllm/v1/engine/mm_input_mapper.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional import PIL from blake3 import blake3 @@ -42,6 +42,8 @@ def __init__( model_config) self.mm_registry.init_mm_limits_per_prompt(model_config) + # Init cache + self.use_cache = model_config.mm_cache_preprocessor self.mm_cache = LRUDictCache[str, MultiModalKwargs](MM_CACHE_SIZE) # DEBUG: Set to None to disable @@ -61,7 +63,7 @@ def process_inputs( mm_hashes: Optional[List[str]], mm_processor_kwargs: Optional[Dict[str, Any]], precomputed_mm_inputs: Optional[List[MultiModalKwargs]], - ) -> Tuple[List[MultiModalKwargs], Optional[List[str]]]: + ) -> List[MultiModalKwargs]: if precomputed_mm_inputs is None: image_inputs = mm_data["image"] if not isinstance(image_inputs, list): @@ -70,26 +72,21 @@ def process_inputs( else: num_inputs = len(precomputed_mm_inputs) - # Check if hash is enabled - use_hash = mm_hashes is not None - if use_hash: + # Sanity + if self.use_cache: assert mm_hashes is not None - assert num_inputs == len( - mm_hashes), "num_inputs = {} len(mm_hashes) = {}".format( - num_inputs, len(mm_hashes)) + assert num_inputs == len(mm_hashes) # Process each image input separately, so that later we can schedule # them in a fine-grained manner. # Apply caching (if enabled) and reuse precomputed inputs (if provided) - ret_hashes: Optional[List[str]] = [] if use_hash else None ret_inputs: List[MultiModalKwargs] = [] for input_id in range(num_inputs): if self.mm_debug_cache_hit_ratio_steps is not None: self.cache_hit_ratio(self.mm_debug_cache_hit_ratio_steps) - mm_hash = None mm_input = None - if use_hash: + if self.use_cache: assert mm_hashes is not None mm_hash = mm_hashes[input_id] mm_input = self.mm_cache.get(mm_hash) @@ -106,7 +103,7 @@ def process_inputs( mm_processor_kwargs=mm_processor_kwargs, ) - if use_hash: + if self.use_cache: # Add to cache assert mm_hash is not None self.mm_cache.put(mm_hash, mm_input) @@ -114,18 +111,15 @@ def process_inputs( self.mm_cache_hits += 1 mm_input = None # Avoids sending mm_input to Server - if use_hash: - assert mm_hash is not None - assert ret_hashes is not None - ret_hashes.append(mm_hash) ret_inputs.append(mm_input) - return ret_inputs, ret_hashes + return ret_inputs class MMInputMapperServer: - def __init__(self, ): + def __init__(self, model_config): + self.use_cache = model_config.mm_cache_preprocessor self.mm_cache = LRUDictCache[str, MultiModalKwargs](MM_CACHE_SIZE) def process_inputs( @@ -135,6 +129,9 @@ def process_inputs( ) -> List[MultiModalKwargs]: assert len(mm_inputs) == len(mm_hashes) + if not self.use_cache: + return mm_inputs + full_mm_inputs = [] for mm_input, mm_hash in zip(mm_inputs, mm_hashes): assert mm_hash is not None diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index 679bf8e25e9ca..732757d6b0ac2 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -1,7 +1,7 @@ import time from typing import Any, Dict, Mapping, Optional, Tuple, Union -from vllm.config import LoRAConfig, ModelConfig +from vllm.config import CacheConfig, LoRAConfig, ModelConfig from vllm.inputs import (INPUT_REGISTRY, InputRegistry, ProcessorInputs, PromptType, SingletonInputsAdapter) from vllm.inputs.parse import is_encoder_decoder_inputs @@ -23,6 +23,7 @@ class Processor: def __init__( self, model_config: ModelConfig, + cache_config: CacheConfig, lora_config: Optional[LoRAConfig], tokenizer: BaseTokenizerGroup, input_registry: InputRegistry = INPUT_REGISTRY, @@ -45,8 +46,9 @@ def __init__( self.mm_input_mapper_client = MMInputMapperClient(model_config) # Multi-modal hasher (for images) - self.mm_hasher = MMHasher( - ) if model_config.mm_cache_preprocessor else None + self.use_hash = model_config.mm_cache_preprocessor or \ + cache_config.enable_prefix_caching + self.mm_hasher = MMHasher() # TODO: run in an ThreadpoolExecutor or BackgroundProcess. # This ideally should releases the GIL, so we should not block the @@ -77,7 +79,7 @@ def process_inputs( # Compute MM hashes (if enabled) mm_hashes = None - if self.mm_hasher is not None: + if self.use_hash: mm_hashes = self.mm_hasher.hash(prompt) # Process inputs. @@ -118,7 +120,7 @@ def process_inputs( # Apply MM mapper mm_inputs = None if len(decoder_inputs.multi_modal_data) > 0: - mm_inputs, mm_hashes = self.mm_input_mapper_client.process_inputs( + mm_inputs = self.mm_input_mapper_client.process_inputs( decoder_inputs.multi_modal_data, mm_hashes, decoder_inputs.mm_processor_kwargs, precomputed_mm_inputs) diff --git a/vllm/v1/request.py b/vllm/v1/request.py index 1737d096e811d..f4783ae366ef0 100644 --- a/vllm/v1/request.py +++ b/vllm/v1/request.py @@ -1,5 +1,5 @@ import enum -from typing import List, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Union from vllm.inputs import DecoderOnlyInputs, SingletonInputsAdapter, token_inputs from vllm.lora.request import LoRARequest @@ -9,6 +9,9 @@ from vllm.v1.engine import EngineCoreRequest from vllm.v1.utils import ConstantList +if TYPE_CHECKING: + from vllm.v1.core.kv_cache_utils import BlockHashType + class Request: @@ -45,6 +48,7 @@ def __init__( self._all_token_ids: List[int] = self.prompt_token_ids.copy() self.num_computed_tokens = 0 + # Multi-modal input metadata. mm_positions = self.inputs.multi_modal_placeholders if mm_positions: # FIXME(woosuk): Support other modalities. @@ -56,6 +60,12 @@ def __init__( if self.inputs.multi_modal_inputs: self.mm_inputs = self.inputs.multi_modal_inputs + self.mm_hashes: List[str] = self.inputs.multi_modal_hashes + + # Cache the computed kv block hashes of the request to avoid + # recomputing. + self._kv_block_hashes: List[BlockHashType] = [] + @classmethod def from_engine_core_request(cls, request: EngineCoreRequest) -> "Request": return cls( @@ -65,6 +75,7 @@ def from_engine_core_request(cls, request: EngineCoreRequest) -> "Request": prompt=request.prompt, multi_modal_data=None, multi_modal_inputs=request.mm_inputs, + multi_modal_hashes=request.mm_hashes, multi_modal_placeholders=request.mm_placeholders, mm_processor_kwargs=None, ), @@ -121,6 +132,17 @@ def get_num_encoder_tokens(self, input_id: int) -> int: num_tokens = self.mm_positions[input_id]["length"] return num_tokens + @property + def kv_block_hashes(self) -> ConstantList["BlockHashType"]: + # Prevent directly appending to the kv_block_hashes. + return ConstantList(self._kv_block_hashes) + + def set_kv_block_hashes(self, value: List["BlockHashType"]) -> None: + self._kv_block_hashes = value + + def append_kv_block_hashes(self, block_hash: "BlockHashType") -> None: + self._kv_block_hashes.append(block_hash) + class RequestStatus(enum.IntEnum): """Status of a request.""" From 866fa4550d572f4ff3521ccf503e0df2e76591a1 Mon Sep 17 00:00:00 2001 From: Konrad Zawora Date: Wed, 18 Dec 2024 01:39:07 +0100 Subject: [PATCH 31/72] [Bugfix] Restore support for larger block sizes (#11259) Signed-off-by: Konrad Zawora --- vllm/config.py | 4 ++++ vllm/engine/arg_utils.py | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 9ecd3e72afa9f..307cf9c8d5b2a 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -917,6 +917,10 @@ def _verify_args(self) -> None: raise ValueError( "GPU memory utilization must be less than 1.0. Got " f"{self.gpu_memory_utilization}.") + if (current_platform.is_cuda() and self.block_size is not None + and self.block_size > 32): + raise ValueError("CUDA Paged Attention kernel only supports " + f"block sizes up to 32. Got {self.block_size}.") def _verify_cache_dtype(self) -> None: if self.cache_dtype == "auto": diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 674577f23eba6..64cc4592c2861 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -424,10 +424,12 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parser.add_argument('--block-size', type=int, default=EngineArgs.block_size, - choices=[8, 16, 32], + choices=[8, 16, 32, 64, 128], help='Token block size for contiguous chunks of ' 'tokens. This is ignored on neuron devices and ' - 'set to max-model-len') + 'set to max-model-len. On CUDA devices, ' + 'only block sizes up to 32 are supported. ' + 'On HPU devices, block size defaults to 128.') parser.add_argument( "--enable-prefix-caching", From 8b79f9e107fd4214187bf65485b3ea1bb3191a46 Mon Sep 17 00:00:00 2001 From: Wallas Henrique Date: Wed, 18 Dec 2024 03:34:08 -0300 Subject: [PATCH 32/72] [Bugfix] Fix guided decoding with tokenizer mode mistral (#11046) --- .buildkite/test-pipeline.yaml | 6 +- requirements-common.txt | 3 +- .../model_executor/test_guided_processors.py | 54 ++++++++- .../decoder_only/language/test_mistral.py | 86 ++++++++++++- .../guided_decoding/xgrammar_decoding.py | 113 +++++++++++------- vllm/transformers_utils/tokenizer.py | 2 +- vllm/transformers_utils/tokenizers/mistral.py | 5 +- 7 files changed, 217 insertions(+), 52 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 44f47fac1c1b3..b563c96343f92 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -224,8 +224,12 @@ steps: mirror_hardwares: [amd] source_file_dependencies: - vllm/model_executor/layers + - vllm/model_executor/guided_decoding - tests/test_logits_processor - command: pytest -v -s test_logits_processor.py + - tests/model_executor/test_guided_processors + commands: + - pytest -v -s test_logits_processor.py + - pytest -v -s model_executor/test_guided_processors.py - label: Speculative decoding tests # 30min source_file_dependencies: diff --git a/requirements-common.txt b/requirements-common.txt index bd2b4b7a01668..1c935303c8d79 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -14,12 +14,13 @@ aiohttp openai >= 1.45.0 # Ensure modern openai package (ensure types module present and max_completion_tokens field support) uvicorn[standard] pydantic >= 2.9 # Required for fastapi >= 0.113.0 -pillow # Required for image processing prometheus_client >= 0.18.0 +pillow # Required for image processing prometheus-fastapi-instrumentator >= 7.0.0 tiktoken >= 0.6.0 # Required for DBRX tokenizer lm-format-enforcer >= 0.10.9, < 0.11 outlines == 0.1.11 +lark == 1.2.2 xgrammar >= 0.1.6; platform_machine == "x86_64" typing_extensions >= 4.10 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 diff --git a/tests/model_executor/test_guided_processors.py b/tests/model_executor/test_guided_processors.py index 9f4d81b583141..3334c0df149b5 100644 --- a/tests/model_executor/test_guided_processors.py +++ b/tests/model_executor/test_guided_processors.py @@ -1,13 +1,19 @@ +import pickle + import pytest import torch from transformers import AutoTokenizer +from vllm.config import ModelConfig from vllm.model_executor.guided_decoding import ( - get_guided_decoding_logits_processor) + get_guided_decoding_logits_processor, + get_local_guided_decoding_logits_processor) from vllm.model_executor.guided_decoding.outlines_logits_processors import ( JSONLogitsProcessor, RegexLogitsProcessor) from vllm.sampling_params import GuidedDecodingParams +MODEL_NAME = 'HuggingFaceH4/zephyr-7b-beta' + def test_guided_logits_processors(sample_regex, sample_json_schema): """Basic unit test for RegexLogitsProcessor and JSONLogitsProcessor.""" @@ -38,14 +44,29 @@ def test_guided_logits_processors(sample_regex, sample_json_schema): @pytest.mark.asyncio @pytest.mark.parametrize("backend", ["outlines", "lm-format-enforcer", "xgrammar"]) -async def test_guided_logits_processor_black_box(backend: str, sample_regex, +@pytest.mark.parametrize("is_local", [True, False]) +async def test_guided_logits_processor_black_box(backend: str, is_local: bool, + sample_regex, sample_json_schema): - tokenizer = AutoTokenizer.from_pretrained('HuggingFaceH4/zephyr-7b-beta') + + config = ModelConfig( + MODEL_NAME, + task="generate", + tokenizer=MODEL_NAME, + tokenizer_mode="auto", + trust_remote_code=False, + seed=0, + dtype="bfloat16", + ) + tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) token_ids = tokenizer.encode( f"Give an example IPv4 address with this regex: {sample_regex}") regex_request = GuidedDecodingParams(regex=sample_regex, backend=backend) - regex_lp = await get_guided_decoding_logits_processor( - regex_request, tokenizer) + + regex_lp = get_local_guided_decoding_logits_processor( + regex_request, tokenizer, config) if is_local else \ + await get_guided_decoding_logits_processor( + regex_request, tokenizer, config) assert regex_lp is not None tensor = torch.rand(32000) original_tensor = torch.clone(tensor) @@ -59,7 +80,7 @@ async def test_guided_logits_processor_black_box(backend: str, sample_regex, json_request = GuidedDecodingParams(json=sample_json_schema, backend=backend) json_lp = await get_guided_decoding_logits_processor( - json_request, tokenizer) + json_request, tokenizer, config) assert json_lp is not None tensor = torch.rand(32000) original_tensor = torch.clone(tensor) @@ -84,3 +105,24 @@ def test_multiple_guided_options_not_allowed(sample_json_schema, sample_regex): with pytest.raises(ValueError, match="You can only use one kind of guided"): GuidedDecodingParams(json=sample_json_schema, grammar="test grammar") + + +def test_pickle_xgrammar_tokenizer_data(): + + # TODO: move to another test file for xgrammar + try: + import xgrammar as xgr + except ImportError: + pytest.skip("Could not import xgrammar to run test") + + from vllm.model_executor.guided_decoding.xgrammar_decoding import ( + TokenizerData) + tokenizer_data = TokenizerData(vocab_type=xgr.VocabType.RAW) + pickled = pickle.dumps(tokenizer_data) + + assert pickled is not None + + depickled: TokenizerData = pickle.loads(pickled) + + assert depickled is not None + assert depickled.vocab_type == xgr.VocabType.RAW diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py index 99b5d5694f9f7..bdc1571784b5d 100644 --- a/tests/models/decoder_only/language/test_mistral.py +++ b/tests/models/decoder_only/language/test_mistral.py @@ -3,17 +3,20 @@ Run `pytest tests/models/test_mistral.py`. """ import copy +import json +import jsonschema +import jsonschema.exceptions import pytest -from vllm import SamplingParams from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import ( # noqa MistralToolParser) +from vllm.sampling_params import GuidedDecodingParams, SamplingParams from ...utils import check_logprobs_close MODELS = [ - "mistralai/Mistral-7B-Instruct-v0.1", + "mistralai/Mistral-7B-Instruct-v0.3", ] MISTRAL_FORMAT_MODELS = [ @@ -126,6 +129,45 @@ } ] +SAMPLE_JSON_SCHEMA = { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "age": { + "type": "integer" + }, + "skills": { + "type": "array", + "items": { + "type": "string", + "maxLength": 10 + }, + "minItems": 3 + }, + "work_history": { + "type": "array", + "items": { + "type": "object", + "properties": { + "company": { + "type": "string" + }, + "duration": { + "type": "number" + }, + "position": { + "type": "string" + } + }, + "required": ["company", "position"] + } + } + }, + "required": ["name", "age", "skills", "work_history"] +} + @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["bfloat16"]) @@ -251,3 +293,43 @@ def test_mistral_function_calling( assert parsed_message.tool_calls[ 0].function.arguments == '{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}' # noqa assert parsed_message.content is None + + +@pytest.mark.parametrize("model", MODELS) +@pytest.mark.parametrize("guided_backend", + ["outlines", "lm-format-enforcer", "xgrammar"]) +def test_mistral_guided_decoding( + vllm_runner, + model: str, + guided_backend: str, +) -> None: + with vllm_runner(model, dtype='bfloat16', + tokenizer_mode="mistral") as vllm_model: + + guided_decoding = GuidedDecodingParams(json=SAMPLE_JSON_SCHEMA, + backend=guided_backend) + params = SamplingParams(max_tokens=512, + temperature=0.7, + guided_decoding=guided_decoding) + + messages = [{ + "role": "system", + "content": "you are a helpful assistant" + }, { + "role": + "user", + "content": + f"Give an example JSON for an employee profile that " + f"fits this schema: {SAMPLE_JSON_SCHEMA}" + }] + outputs = vllm_model.model.chat(messages, sampling_params=params) + + generated_text = outputs[0].outputs[0].text + json_response = json.loads(generated_text) + assert outputs is not None + + try: + jsonschema.validate(instance=json_response, + schema=SAMPLE_JSON_SCHEMA) + except jsonschema.exceptions.ValidationError: + pytest.fail("Generated response is not valid with JSON schema") diff --git a/vllm/model_executor/guided_decoding/xgrammar_decoding.py b/vllm/model_executor/guided_decoding/xgrammar_decoding.py index fc45e37cf6f06..5b97f03257502 100644 --- a/vllm/model_executor/guided_decoding/xgrammar_decoding.py +++ b/vllm/model_executor/guided_decoding/xgrammar_decoding.py @@ -3,7 +3,7 @@ import json from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any, NamedTuple +from typing import TYPE_CHECKING, Any import torch from transformers import PreTrainedTokenizerFast @@ -16,6 +16,7 @@ from vllm.model_executor.guided_decoding.xgrammar_utils import ( convert_lark_to_gbnf, grammar_is_likely_lark) +from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer if TYPE_CHECKING: from transformers import PreTrainedTokenizer @@ -37,11 +38,21 @@ def get_local_xgrammar_guided_decoding_logits_processor( return XGrammarLogitsProcessor(config) -class TokenizerData(NamedTuple): +@dataclass(frozen=True) +class TokenizerData: """Immutable container for cached tokenizer data.""" - encoded_vocab: list[str] - stop_token_ids: list[int] | None - backend_str: str + encoded_vocab: list[str] = field(default_factory=list) + stop_token_ids: list[int] | None = None + # These fields are mutually exclusive: `backend_str` is used to create a + # TokenizeInfo with `TokenizerInfo.from_huggingface` while `vocab_type` is + # used within the constructor of TokenizeInfo + backend_str: str | None = None + vocab_type: xgr.VocabType | None = None + + def __post_init__(self): + # Check for mutual exclusive + assert not (self.backend_str and self.vocab_type), \ + "backend_str and vocab_type are mutual exclusive" class TokenizerDataCache: @@ -68,18 +79,27 @@ def get_tokenizer_data(cls, "get_vocab method.") from e stop_token_ids = None - backend_str = xgr.VocabType.RAW + backend_str = "" + vocab_type = xgr.VocabType.RAW + + if stop_token_ids is None and hasattr( + tokenizer, + "eos_token_id") and tokenizer.eos_token_id is not None: + stop_token_ids = [tokenizer.eos_token_id] + if isinstance(tokenizer, PreTrainedTokenizerFast): backend_str = tokenizer.backend_tokenizer.to_str() - if stop_token_ids is None and hasattr( - tokenizer, - "eos_token_id") and tokenizer.eos_token_id is not None: - stop_token_ids = [tokenizer.eos_token_id] + vocab_type = None + + elif isinstance(tokenizer, MistralTokenizer): + # REF: https://github.com/mlc-ai/xgrammar/blob/5e141f6ff1ca02bc31f9e512e68b61f2a8ae88e5/tests/python/test_tokenizer_info.py#L43 # noqa: E501 + vocab_type = xgr.VocabType.BYTE_FALLBACK cls._cache[tokenizer_hash] = TokenizerData( encoded_vocab=encoded_vocab, stop_token_ids=stop_token_ids, - backend_str=backend_str) + backend_str=backend_str, + vocab_type=vocab_type) return cls._cache[tokenizer_hash] @@ -98,11 +118,30 @@ def get_compiler(cls, config: GrammarConfig) -> xgr.GrammarCompiler: cache_key = str(config.tokenizer_hash) if cache_key not in cls._cache: - assert config.encoded_vocab is not None - tokenizer_info = xgr.TokenizerInfo._create_from_handle( - xgr_core.TokenizerInfo.from_huggingface( - config.encoded_vocab, config.backend_str, - config.vocab_size, config.stop_token_ids)) + assert config.tokenizer_data is not None + assert config.tokenizer_data.encoded_vocab is not None + + config_data = config.tokenizer_data + + # In TokenizerDataCache.get_tokenizer_data, a serializable + # tokenizer_data is created and cached. This data is used to build + # a tokenizer_info and create an xgrammar compiler. + # - If tokenizer_data has backend_str set, use + # xgr_core.TokenizerInfo.from_huggingface (a C++ bind). + # - Otherwise, use the default constructor with vocab_type. + # - xgr_core.TokenizerInfo.from_huggingface != + # xgr.TokenizerInfo.from_huggingface. + if config_data.backend_str: + tokenizer_info = xgr.TokenizerInfo._create_from_handle( + xgr_core.TokenizerInfo.from_huggingface( + config_data.encoded_vocab, config_data.backend_str, + config.vocab_size, config_data.stop_token_ids)) + else: + tokenizer_info = xgr.TokenizerInfo( + config_data.encoded_vocab, + config_data.vocab_type, + vocab_size=config.vocab_size, + stop_token_ids=config_data.stop_token_ids) cls._cache[cache_key] = xgr.GrammarCompiler( tokenizer_info, max_threads=config.max_threads) @@ -118,10 +157,7 @@ class GrammarConfig: grammar_str: str | None = None json_object: bool | None = None max_threads: int = 8 - # Only populated if tokenizer_hash not in cache - encoded_vocab: list[str] | None = None - stop_token_ids: list[int] | None = None - backend_str: str | None = None + tokenizer_data: TokenizerData | None = None @classmethod def from_guided_params(cls, @@ -132,9 +168,6 @@ def from_guided_params(cls, tokenizer_hash = hash(tokenizer) tokenizer_data = TokenizerDataCache.get_tokenizer_data(tokenizer) - encoded_vocab = tokenizer_data.encoded_vocab - stop_token_ids = tokenizer_data.stop_token_ids - backend_str = tokenizer_data.backend_str if guided_params.json: if not isinstance(guided_params.json, str): @@ -152,11 +185,9 @@ def from_guided_params(cls, return cls(json_str=json_str, vocab_size=model_config.hf_text_config.vocab_size, - encoded_vocab=encoded_vocab, - stop_token_ids=stop_token_ids, - backend_str=backend_str, tokenizer_hash=tokenizer_hash, - max_threads=max_threads) + max_threads=max_threads, + tokenizer_data=tokenizer_data) elif guided_params.grammar: # XGrammar only supports GBNF grammars, so we must convert Lark if grammar_is_likely_lark(guided_params.grammar): @@ -181,19 +212,17 @@ def from_guided_params(cls, return cls(grammar_str=grammar_str, vocab_size=model_config.hf_text_config.vocab_size, - encoded_vocab=encoded_vocab, - stop_token_ids=stop_token_ids, - backend_str=backend_str, tokenizer_hash=tokenizer_hash, - max_threads=max_threads) + max_threads=max_threads, + tokenizer_data=tokenizer_data) elif guided_params.json_object: - return cls(json_object=True, - vocab_size=model_config.hf_text_config.vocab_size, - encoded_vocab=encoded_vocab, - stop_token_ids=stop_token_ids, - backend_str=backend_str, - tokenizer_hash=tokenizer_hash, - max_threads=max_threads) + return cls( + json_object=True, + vocab_size=model_config.hf_text_config.vocab_size, + tokenizer_hash=tokenizer_hash, + max_threads=max_threads, + tokenizer_data=tokenizer_data, + ) else: raise ValueError( "Currently only support JSON and EBNF grammar mode for xgrammar" @@ -269,10 +298,14 @@ def __call__(self, input_ids: list[int], # fill_next_token_bitmask so we move it to the device of scores device_type = scores.device.type if device_type != "cuda": - scores = scores.to("cpu") + scores = scores.to("cpu").unsqueeze(0) + + # Note: In this method, if the tensors have different dimensions + # on CPU device fails, but on GPU it runs without error. Hence the + # unsqueeze above for scores, to match the token bitmask shape xgr.apply_token_bitmask_inplace(scores, self.token_bitmask.to(scores.device)) if device_type != "cuda": - scores = scores.to(device_type) + scores = scores.to(device_type).squeeze() return scores diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 54f9f895fe541..e6701f4c4b835 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -132,7 +132,7 @@ def get_tokenizer( if is_from_mistral_org and tokenizer_mode != "mistral": warnings.warn( 'It is strongly recommended to run mistral models with ' - '`--tokenizer_mode "mistral"` to ensure correct ' + '`--tokenizer-mode "mistral"` to ensure correct ' 'encoding and decoding.', FutureWarning, stacklevel=2) diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py index 83b3c37d6f04c..17d722e3d88fe 100644 --- a/vllm/transformers_utils/tokenizers/mistral.py +++ b/vllm/transformers_utils/tokenizers/mistral.py @@ -314,12 +314,15 @@ def _token_to_id(t: str): if regular_tokens: decoded_list.append( - self.decode(regular_tokens)) # type: ignore + self.tokenizer.decode(regular_tokens)) # type: ignore decoded = ''.join(decoded_list) return decoded + # WARN: Outlines logits processors can overwrite this method. + # See: guided_decoding/outlines_logits_processors.py::_adapt_tokenizer + # for more. def decode(self, ids: Union[List[int], int], skip_special_tokens: bool = True) -> str: From f04e407e6b6b9ce65c16cffda836f05c2ad32682 Mon Sep 17 00:00:00 2001 From: Yan Ma Date: Wed, 18 Dec 2024 14:34:23 +0800 Subject: [PATCH 33/72] [MISC][XPU]update ipex link for CI fix (#11278) --- requirements-xpu.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements-xpu.txt b/requirements-xpu.txt index e41295792283f..42c6c321d040c 100644 --- a/requirements-xpu.txt +++ b/requirements-xpu.txt @@ -9,8 +9,8 @@ setuptools-scm>=8 wheel jinja2 -torch @ https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp310-cp310-linux_x86_64.whl -intel-extension-for-pytorch @ https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.5.10%2Bgit9d489a8-cp310-cp310-linux_x86_64.whl -oneccl_bind_pt @ https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp310-cp310-linux_x86_64.whl +torch @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp310-cp310-linux_x86_64.whl +intel-extension-for-pytorch @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/intel_extension_for_pytorch-2.5.10%2Bgit9d489a8-cp310-cp310-linux_x86_64.whl +oneccl_bind_pt @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp310-cp310-linux_x86_64.whl triton-xpu == 3.0.0b1 From 60508ffda91c22e4cde3b18f149d222211db8886 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Wed, 18 Dec 2024 09:57:16 -0500 Subject: [PATCH 34/72] [Kernel]: Cutlass 2:4 Sparsity + FP8/Int8 Quant Support (#10995) Co-authored-by: Faraz Shahsavan Co-authored-by: ilmarkov Co-authored-by: Rahul Tuli Co-authored-by: rshaw@neuralmagic.com --- CMakeLists.txt | 26 +- .../cutlass_benchmarks/sparse_benchmarks.py | 384 ++++++++++++++ benchmarks/cutlass_benchmarks/utils.py | 96 ++++ .../cutlass_benchmarks/w8a8_benchmarks.py | 28 +- .../cutlass_benchmarks/weight_shapes.py | 2 +- csrc/core/math.hpp | 7 + csrc/cutlass_extensions/common.cpp | 11 + csrc/cutlass_extensions/common.hpp | 35 ++ .../epilogue/scaled_mm_epilogues_c3x.hpp | 4 +- csrc/ops.h | 9 + csrc/quantization/cutlass_w8a8/common.hpp | 27 - .../cutlass_w8a8/scaled_mm_c2x.cuh | 3 +- .../cutlass_w8a8/scaled_mm_c3x.cu | 3 +- .../cutlass_w8a8/scaled_mm_entry.cu | 12 +- csrc/sparse/cutlass/sparse_compressor_c3x.cu | 163 ++++++ .../sparse/cutlass/sparse_compressor_entry.cu | 42 ++ csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu | 303 +++++++++++ csrc/sparse/cutlass/sparse_scaled_mm_c3x.cuh | 496 ++++++++++++++++++ csrc/sparse/cutlass/sparse_scaled_mm_entry.cu | 59 +++ csrc/torch_bindings.cpp | 15 + pyproject.toml | 2 +- tests/kernels/test_semi_structured.py | 131 +++++ tests/quantization/test_compressed_tensors.py | 103 +++- tests/weight_loading/models.txt | 2 + .../run_model_weight_loading_test.sh | 4 + tests/weight_loading/test_weight_loading.py | 7 + vllm/_custom_ops.py | 103 ++++ .../compressed_tensors/compressed_tensors.py | 187 ++++++- .../compressed_tensors/schemes/__init__.py | 15 +- .../schemes/compressed_tensors_24.py | 203 +++++++ 30 files changed, 2365 insertions(+), 117 deletions(-) create mode 100644 benchmarks/cutlass_benchmarks/sparse_benchmarks.py create mode 100644 benchmarks/cutlass_benchmarks/utils.py create mode 100644 csrc/core/math.hpp create mode 100644 csrc/cutlass_extensions/common.cpp create mode 100644 csrc/cutlass_extensions/common.hpp delete mode 100644 csrc/quantization/cutlass_w8a8/common.hpp create mode 100644 csrc/sparse/cutlass/sparse_compressor_c3x.cu create mode 100644 csrc/sparse/cutlass/sparse_compressor_entry.cu create mode 100644 csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu create mode 100644 csrc/sparse/cutlass/sparse_scaled_mm_c3x.cuh create mode 100644 csrc/sparse/cutlass/sparse_scaled_mm_entry.cu create mode 100644 tests/kernels/test_semi_structured.py create mode 100644 vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py diff --git a/CMakeLists.txt b/CMakeLists.txt index bf19b3d227171..51b49a18dddf2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -206,7 +206,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") SET(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library") # Set CUTLASS_REVISION manually -- its revision detection doesn't work in this case. - set(CUTLASS_REVISION "v3.5.1" CACHE STRING "CUTLASS revision to use") + set(CUTLASS_REVISION "v3.6.0" CACHE STRING "CUTLASS revision to use") # Use the specified CUTLASS source directory for compilation if VLLM_CUTLASS_SRC_DIR is provided if (DEFINED ENV{VLLM_CUTLASS_SRC_DIR}) @@ -223,13 +223,13 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") FetchContent_Declare( cutlass GIT_REPOSITORY https://github.com/nvidia/cutlass.git - GIT_TAG v3.5.1 + GIT_TAG 8aa95dbb888be6d81c6fbf7169718c5244b53227 GIT_PROGRESS TRUE # Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history. # Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags. # So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE - GIT_SHALLOW TRUE + GIT_SHALLOW FALSE ) endif() FetchContent_MakeAvailable(cutlass) @@ -241,7 +241,10 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") "csrc/quantization/awq/gemm_kernels.cu" "csrc/custom_all_reduce.cu" "csrc/permute_cols.cu" - "csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu") + "csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu" + "csrc/sparse/cutlass/sparse_scaled_mm_entry.cu" + "csrc/sparse/cutlass/sparse_compressor_entry.cu" + "csrc/cutlass_extensions/common.cpp") set_gencode_flags_for_srcs( SRCS "${VLLM_EXT_SRC}" @@ -271,11 +274,14 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") endif() # - # The cutlass_scaled_mm kernels for Hopper (c3x, i.e. CUTLASS 3.x) require + # The cutlass_scaled_mm cutlass_scaled_sparse_mm, and cutlass_compressor kernels + # For Hopper (c3x, i.e. CUTLASS 3.x) require # CUDA 12.0 or later (and only work on Hopper, 9.0/9.0a for now). cuda_archs_loose_intersection(SCALED_MM_3X_ARCHS "9.0;9.0a" "${CUDA_ARCHS}") if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0 AND SCALED_MM_3X_ARCHS) - set(SRCS "csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu") + set(SRCS "csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu" + "csrc/sparse/cutlass/sparse_compressor_c3x.cu" + "csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu") set_gencode_flags_for_srcs( SRCS "${SRCS}" CUDA_ARCHS "${SCALED_MM_3X_ARCHS}") @@ -284,12 +290,12 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") message(STATUS "Building scaled_mm_c3x for archs: ${SCALED_MM_3X_ARCHS}") else() if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0 AND SCALED_MM_3X_ARCHS) - message(STATUS "Not building scaled_mm_c3x as CUDA Compiler version is " + message(STATUS "Not building cutlass_c3x kernels as CUDA Compiler version is " "not >= 12.0, we recommend upgrading to CUDA 12.0 or " - "later if you intend on running FP8 quantized models on " + "later if you intend on running FP8 sparse or quantized models on " "Hopper.") else() - message(STATUS "Not building scaled_mm_c3x as no compatible archs found " + message(STATUS "Not building cutlass_c3x as no compatible archs found " "in CUDA target architectures") endif() @@ -404,7 +410,7 @@ define_gpu_extension_target( SOURCES ${VLLM_EXT_SRC} COMPILE_FLAGS ${VLLM_GPU_FLAGS} ARCHITECTURES ${VLLM_GPU_ARCHES} - INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR} + INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR};${CUTLASS_TOOLS_UTIL_INCLUDE_DIR} USE_SABI 3 WITH_SOABI) diff --git a/benchmarks/cutlass_benchmarks/sparse_benchmarks.py b/benchmarks/cutlass_benchmarks/sparse_benchmarks.py new file mode 100644 index 0000000000000..3d1c5e392f9e2 --- /dev/null +++ b/benchmarks/cutlass_benchmarks/sparse_benchmarks.py @@ -0,0 +1,384 @@ +import argparse +import copy +import itertools +import pickle as pkl +import time +from typing import Callable, Iterable, List, Tuple + +import torch +import torch.utils.benchmark as TBenchmark +from torch.utils.benchmark import Measurement as TMeasurement +from utils import make_rand_sparse_tensors +from weight_shapes import WEIGHT_SHAPES + +from vllm import _custom_ops as ops +from vllm.utils import FlexibleArgumentParser + +DEFAULT_MODELS = list(WEIGHT_SHAPES.keys()) +DEFAULT_BATCH_SIZES = [1, 16, 32, 64, 128, 256, 512] +DEFAULT_TP_SIZES = [1] + + +# bench +def bench_fn(label: str, sub_label: str, description: str, fn: Callable, *args, + **kwargs) -> TMeasurement: + min_run_time = 1 + + globals = { + "args": args, + "kwargs": kwargs, + "fn": fn, + } + return TBenchmark.Timer( + stmt="fn(*args, **kwargs)", + globals=globals, + label=label, + sub_label=sub_label, + description=description, + ).blocked_autorange(min_run_time=min_run_time) + + +def bench_int8(dtype: torch.dtype, m: int, k: int, n: int, label: str, + sub_label: str) -> Iterable[TMeasurement]: + assert dtype == torch.int8 + b_compressed, e, a, b = make_rand_sparse_tensors(torch.int8, m, n, k) + scale_a = torch.tensor(1.0, device="cuda", dtype=torch.float32) + scale_b = torch.tensor(1.0, device="cuda", dtype=torch.float32) + bias = torch.zeros((n, ), device="cuda", dtype=torch.bfloat16) + + out = ops.cutlass_scaled_sparse_mm(a, b_compressed, e, scale_a, scale_b, + torch.bfloat16) + out_ref = ops.cutlass_scaled_mm(a, b, scale_a, scale_b, torch.bfloat16) + + if not torch.allclose(out, out_ref): + print("Incorrect results") + print(out) + print(out_ref) + else: + print("Correct results") + + timers = [] + # pytorch impl - bfloat16 + timers.append( + bench_fn(label, sub_label, "pytorch_bf16_bf16_bf16_matmul-no-scales", + torch.mm, a.to(dtype=torch.bfloat16), + b.to(dtype=torch.bfloat16))) + + # pytorch impl - float16 + timers.append( + bench_fn(label, sub_label, + "pytorch_fp16_fp16_fp16_matmul-no-scales", torch.mm, + a.to(dtype=torch.float16), b.to(dtype=torch.float16))) + + # cutlass impl + timers.append( + bench_fn(label, sub_label, "cutlass_i8_i8_bf16_scaled_mm", + ops.cutlass_scaled_mm, a, b, scale_a, scale_b, + torch.bfloat16)) + + # cutlass with bias + timers.append( + bench_fn(label, sub_label, "cutlass_i8_i8_bf16_scaled_mm_bias", + ops.cutlass_scaled_mm, a, b, scale_a, scale_b, torch.bfloat16, + bias)) + + # cutlass sparse impl + timers.append( + bench_fn(label, sub_label, "cutlass_i8_i8_bf16_scaled_sparse_mm", + ops.cutlass_scaled_sparse_mm, a, b_compressed, e, scale_a, + scale_b, torch.bfloat16)) + + # cutlass sparse with bias + timers.append( + bench_fn(label, sub_label, "cutlass_i8_i8_bf16_scaled_sparse_mm_bias", + ops.cutlass_scaled_sparse_mm, a, b_compressed, e, scale_a, + scale_b, torch.bfloat16, bias)) + + return timers + + +def bench_fp8(dtype: torch.dtype, m: int, k: int, n: int, label: str, + sub_label: str) -> Iterable[TMeasurement]: + assert dtype == torch.float8_e4m3fn + b_compressed, e, a, b = make_rand_sparse_tensors(torch.float8_e4m3fn, m, n, + k) + scale_a = torch.tensor(1.0, device="cuda", dtype=torch.float32) + scale_b = torch.tensor(1.0, device="cuda", dtype=torch.float32) + bias = torch.zeros((n, ), device="cuda", dtype=torch.bfloat16) + + out = ops.cutlass_scaled_sparse_mm(a, b_compressed, e, scale_a, scale_b, + torch.bfloat16) + out_ref = ops.cutlass_scaled_mm(a, b, scale_a, scale_b, torch.bfloat16) + + if not torch.allclose(out, out_ref): + print("Incorrect results") + print(out) + print(out_ref) + else: + print("Correct results") + + timers = [] + + # pytorch impl w. bf16 + timers.append( + bench_fn(label, sub_label, "pytorch_bf16_bf16_bf16_matmul-no-scales", + torch.mm, a.to(dtype=torch.bfloat16, device="cuda"), + b.to(dtype=torch.bfloat16, device="cuda"))) + + # pytorch impl: bf16 output, without fp8 fast accum + timers.append( + bench_fn(label, + sub_label, + "pytorch_fp8_fp8_bf16_scaled_mm", + torch._scaled_mm, + a, + b, + scale_a=scale_a, + scale_b=scale_b, + out_dtype=torch.bfloat16)) + + # pytorch impl: bf16 output, with fp8 fast accum + timers.append( + bench_fn(label, + sub_label, + "pytorch_fp8_fp8_bf16_scaled_mm_fast_accum", + torch._scaled_mm, + a, + b, + scale_a=scale_a, + scale_b=scale_b, + out_dtype=torch.bfloat16, + use_fast_accum=True)) + + # pytorch impl: fp16 output, without fp8 fast accum + timers.append( + bench_fn(label, + sub_label, + "pytorch_fp8_fp8_fp16_scaled_mm", + torch._scaled_mm, + a, + b, + scale_a=scale_a, + scale_b=scale_b, + out_dtype=torch.float16)) + + # pytorch impl: fp16 output, with fp8 fast accum + timers.append( + bench_fn(label, + sub_label, + "pytorch_fp8_fp8_fp16_scaled_mm_fast_accum", + torch._scaled_mm, + a, + b, + scale_a=scale_a, + scale_b=scale_b, + out_dtype=torch.float16, + use_fast_accum=True)) + + # cutlass impl: bf16 output + timers.append( + bench_fn(label, sub_label, "cutlass_fp8_fp8_bf16_scaled_mm", + ops.cutlass_scaled_mm, a, b, scale_a, scale_b, + torch.bfloat16)) + + # cutlass impl: bf16 output + timers.append( + bench_fn(label, sub_label, "cutlass_fp8_fp8_bf16_scaled_sparse_mm", + ops.cutlass_scaled_sparse_mm, a, b_compressed, e, scale_a, + scale_b, torch.bfloat16)) + + # cutlass impl: fp16 output + timers.append( + bench_fn(label, sub_label, "cutlass_fp8_fp8_fp16_scaled_sparse_mm", + ops.cutlass_scaled_sparse_mm, a, b_compressed, e, scale_a, + scale_b, torch.float16)) + + # cutlass impl: bf16 output, with bias + timers.append( + bench_fn(label, sub_label, + "cutlass_fp8_fp8_bf16_scaled_sparse_mm_bias", + ops.cutlass_scaled_sparse_mm, a, b_compressed, e, scale_a, + scale_b, torch.bfloat16, bias)) + + # cutlass impl: fp16 output, with bias + timers.append( + bench_fn(label, sub_label, + "cutlass_fp8_fp8_fp16_scaled_sparse_mm_bias", + ops.cutlass_scaled_sparse_mm, a, b_compressed, e, scale_a, + scale_b, torch.float16, bias.to(dtype=torch.float16))) + + return timers + + +def bench(dtype: torch.dtype, m: int, k: int, n: int, label: str, + sub_label: str) -> Iterable[TMeasurement]: + if dtype == torch.int8: + return bench_int8(dtype, m, k, n, label, sub_label) + if dtype == torch.float8_e4m3fn: + return bench_fp8(dtype, m, k, n, label, sub_label) + raise ValueError("unsupported type") + + +# runner +def print_timers(timers: Iterable[TMeasurement]): + compare = TBenchmark.Compare(timers) + compare.print() + + +def run(dtype: torch.dtype, + MKNs: Iterable[Tuple[int, int, int]]) -> Iterable[TMeasurement]: + results = [] + for m, k, n in MKNs: + timers = bench(dtype, m, k, n, f"scaled-{dtype}-gemm", + f"MKN=({m}x{k}x{n})") + print_timers(timers) + results.extend(timers) + + return results + + +# output makers +def make_output(data: Iterable[TMeasurement], + MKNs: Iterable[Tuple[int, int, int]], + base_description: str, + timestamp=None): + print(f"== All Results {base_description} ====") + print_timers(data) + + # pickle all the results + timestamp = int(time.time()) if timestamp is None else timestamp + with open(f"{base_description}-{timestamp}.pkl", "wb") as f: + pkl.dump(data, f) + + +# argparse runners + + +def run_square_bench(args): + dim_sizes = list( + range(args.dim_start, args.dim_end + 1, args.dim_increment)) + MKNs = list(zip(dim_sizes, dim_sizes, dim_sizes)) + data = run(args.dtype, MKNs) + + make_output(data, MKNs, f"square_bench-{args.dtype}") + + +def run_range_bench(args): + dim_sizes = list(range(args.dim_start, args.dim_end, args.dim_increment)) + n = len(dim_sizes) + Ms = [args.m_constant] * n if args.m_constant is not None else dim_sizes + Ks = [args.k_constant] * n if args.k_constant is not None else dim_sizes + Ns = [args.n_constant] * n if args.n_constant is not None else dim_sizes + MKNs = list(zip(Ms, Ks, Ns)) + data = run(args.dtype, MKNs) + + make_output(data, MKNs, f"range_bench-{args.dtype}") + + +def run_model_bench(args): + print("Benchmarking models:") + for i, model in enumerate(args.models): + print(f"[{i}] {model}") + + def model_shapes(model_name: str, tp_size: int) -> List[Tuple[int, int]]: + KNs = [] + for KN, tp_split_dim in copy.deepcopy(WEIGHT_SHAPES[model_name]): + KN[tp_split_dim] = KN[tp_split_dim] // tp_size + KNs.append(KN) + return KNs + + model_bench_data = [] + models_tps = list(itertools.product(args.models, args.tp_sizes)) + for model, tp_size in models_tps: + Ms = args.batch_sizes + KNs = model_shapes(model, tp_size) + MKNs = [] + for m in Ms: + for k, n in KNs: + MKNs.append((m, k, n)) + + data = run(args.dtype, MKNs) + model_bench_data.append(data) + + # Print all results + for data, model_tp in zip(model_bench_data, models_tps): + model, tp_size = model_tp + print(f"== Results {args.dtype} {model}-TP{tp_size} ====") + print_timers(data) + + timestamp = int(time.time()) + + all_data = [] + for d in model_bench_data: + all_data.extend(d) + # pickle all data + with open(f"model_bench-{args.dtype}-{timestamp}.pkl", "wb") as f: + pkl.dump(all_data, f) + + +if __name__ == '__main__': + + def to_torch_dtype(dt): + if dt == "int8": + return torch.int8 + if dt == "fp8": + return torch.float8_e4m3fn + raise ValueError("unsupported dtype") + + parser = FlexibleArgumentParser( + description=""" +Benchmark Cutlass GEMM. + + To run square GEMMs: + python3 ./benchmarks/cutlass_benchmarks/sparse_benchmarks.py --dtype fp8 square_bench --dim-start 128 --dim-end 512 --dim-increment 64 + + To run constant N and K and sweep M: + python3 ./benchmarks/cutlass_benchmarks/sparse_benchmarks.py --dtype fp8 range_bench --dim-start 128 --dim-end 512 --dim-increment 64 --n-constant 16384 --k-constant 16384 + + To run dimensions from a model: + python3 ./benchmarks/cutlass_benchmarks/sparse_benchmarks.py --dtype fp8 model_bench --models meta-llama/Llama-2-7b-hf --batch-sizes 16 --tp-sizes 1 + + Output: + - a .pkl file, that is a list of raw torch.benchmark.utils.Measurements for the pytorch and cutlass implementations for the various GEMMs. + """, # noqa: E501 + formatter_class=argparse.RawTextHelpFormatter) + + parser.add_argument("--dtype", + type=to_torch_dtype, + required=True, + help="Available options are ['int8', 'fp8']") + subparsers = parser.add_subparsers(dest="cmd") + + square_parser = subparsers.add_parser("square_bench") + square_parser.add_argument("--dim-start", type=int, required=True) + square_parser.add_argument("--dim-end", type=int, required=True) + square_parser.add_argument("--dim-increment", type=int, required=True) + square_parser.set_defaults(func=run_square_bench) + + range_parser = subparsers.add_parser("range_bench") + range_parser.add_argument("--dim-start", type=int, required=True) + range_parser.add_argument("--dim-end", type=int, required=True) + range_parser.add_argument("--dim-increment", type=int, required=True) + range_parser.add_argument("--m-constant", type=int, default=None) + range_parser.add_argument("--n-constant", type=int, default=None) + range_parser.add_argument("--k-constant", type=int, default=None) + range_parser.set_defaults(func=run_range_bench) + + model_parser = subparsers.add_parser("model_bench") + model_parser.add_argument("--models", + nargs="+", + type=str, + default=DEFAULT_MODELS, + choices=WEIGHT_SHAPES.keys()) + model_parser.add_argument("--tp-sizes", + nargs="+", + type=int, + default=DEFAULT_TP_SIZES) + model_parser.add_argument("--batch-sizes", + nargs="+", + type=int, + default=DEFAULT_BATCH_SIZES) + model_parser.set_defaults(func=run_model_bench) + + args = parser.parse_args() + args.func(args) diff --git a/benchmarks/cutlass_benchmarks/utils.py b/benchmarks/cutlass_benchmarks/utils.py new file mode 100644 index 0000000000000..ef06fcd6604dd --- /dev/null +++ b/benchmarks/cutlass_benchmarks/utils.py @@ -0,0 +1,96 @@ +# Cutlass bench utils +from typing import Iterable, Tuple + +import torch + +import vllm._custom_ops as ops + + +def to_fp8(tensor: torch.Tensor) -> torch.Tensor: + finfo = torch.finfo(torch.float8_e4m3fn) + return torch.round(tensor.clamp( + min=finfo.min, max=finfo.max)).to(dtype=torch.float8_e4m3fn) + + +def to_int8(tensor: torch.Tensor) -> torch.Tensor: + return torch.round(tensor.clamp(min=-128, max=127)).to(dtype=torch.int8) + + +def to_bf16(tensor: torch.Tensor) -> torch.Tensor: + return tensor.to(dtype=torch.bfloat16) + + +def to_fp16(tensor: torch.Tensor) -> torch.Tensor: + return tensor.to(dtype=torch.float16) + + +def make_rand_tensors(dtype: torch.dtype, m: int, n: int, + k: int) -> Tuple[torch.Tensor, torch.Tensor]: + a = torch.randn((m, k), device='cuda') * 5 + b = torch.randn((n, k), device='cuda').t() * 5 + + if dtype == torch.int8: + return to_int8(a), to_int8(b) + if dtype == torch.float8_e4m3fn: + return to_fp8(a), to_fp8(b) + + raise ValueError("unsupported dtype") + + +def prune_to_2_4(tensor): + # Reshape tensor to [N, 4] where N is number of groups of 4 + original_shape = tensor.shape + reshaped = tensor.reshape(-1, 4) + + # Get indices of top 2 absolute values in each group of 4 + _, indices = torch.topk(torch.abs(reshaped), k=2, dim=1) + + # Create binary mask + mask = torch.zeros_like(reshaped) + mask.scatter_(dim=1, + index=indices, + src=torch.ones_like(indices, dtype=mask.dtype)) + + # Apply mask and reshape back + pruned = reshaped * mask + + # Turn all -0.0 to 0.0 + pruned[pruned == -0.0] = 0.0 + + return pruned.reshape(original_shape) + + +def make_rand_sparse_tensors(dtype: torch.dtype, m: int, n: int, + k: int) -> Tuple[torch.Tensor, torch.Tensor]: + a = torch.randn((m, k), device='cuda') * 5 + b = torch.randn((n, k), device='cuda').t() * 5 + + b = prune_to_2_4(b.t()).t() + + if dtype == torch.int8: + a, b = to_int8(a), to_int8(b) + elif dtype == torch.float8_e4m3fn: + a, b = to_fp8(a), to_fp8(b) + elif dtype == torch.float16: + a, b = to_fp16(a), to_fp16(b) + elif dtype == torch.bfloat16: + a, b = to_bf16(a), to_bf16(b) + else: + raise ValueError("unsupported dtype") + + b_compressed, e = ops.cutlass_sparse_compress(b.t()) + + # Compressed B, Metadata, Original A, B + return b_compressed, e, a, b + + +def make_n_rand_sparse_tensors(num_tensors: int, dtype: torch.dtype, + m: int, n: int, k: int) -> \ + Tuple[Iterable[torch.Tensor], Iterable[torch.Tensor]]: + ABs = [] + for _ in range(num_tensors): + b_comp, e, a, b = make_rand_sparse_tensors(dtype, m, n, k) + if b_comp is not None: + ABs.append(make_rand_sparse_tensors(dtype, m, n, k)) + BComps, Es, As, Bs = zip(*ABs) + return list(BComps), list(Es), list(As), list(Bs) diff --git a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py index 63cf5d50cac75..d0353bc8cb42a 100644 --- a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py +++ b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py @@ -8,6 +8,7 @@ import torch import torch.utils.benchmark as TBenchmark from torch.utils.benchmark import Measurement as TMeasurement +from utils import make_rand_tensors from weight_shapes import WEIGHT_SHAPES from vllm import _custom_ops as ops @@ -17,31 +18,6 @@ DEFAULT_BATCH_SIZES = [1, 16, 32, 64, 128, 256, 512] DEFAULT_TP_SIZES = [1] -# helpers - - -def to_fp8(tensor: torch.Tensor) -> torch.Tensor: - finfo = torch.finfo(torch.float8_e4m3fn) - return torch.round(tensor.clamp( - min=finfo.min, max=finfo.max)).to(dtype=torch.float8_e4m3fn) - - -def to_int8(tensor: torch.Tensor) -> torch.Tensor: - return torch.round(tensor.clamp(min=-128, max=127)).to(dtype=torch.int8) - - -def make_rand_tensors(dtype: torch.dtype, m: int, n: int, - k: int) -> Tuple[torch.Tensor, torch.Tensor]: - a = torch.randn((m, k), device='cuda') * 5 - b = torch.randn((n, k), device='cuda').t() * 5 - - if dtype == torch.int8: - return to_int8(a), to_int8(b) - if dtype == torch.float8_e4m3fn: - return to_fp8(a), to_fp8(b) - - raise ValueError("unsupported dtype") - # bench def bench_fn(label: str, sub_label: str, description: str, fn: Callable, *args, @@ -386,4 +362,4 @@ def to_torch_dtype(dt): model_parser.set_defaults(func=run_model_bench) args = parser.parse_args() - args.func(args) + args.func(args) \ No newline at end of file diff --git a/benchmarks/cutlass_benchmarks/weight_shapes.py b/benchmarks/cutlass_benchmarks/weight_shapes.py index 25ec9d6028627..d58fb0bf86374 100644 --- a/benchmarks/cutlass_benchmarks/weight_shapes.py +++ b/benchmarks/cutlass_benchmarks/weight_shapes.py @@ -40,4 +40,4 @@ ([8192, 57344], 1), ([28672, 8192], 0), ], -} +} \ No newline at end of file diff --git a/csrc/core/math.hpp b/csrc/core/math.hpp new file mode 100644 index 0000000000000..ba9f40a230c8e --- /dev/null +++ b/csrc/core/math.hpp @@ -0,0 +1,7 @@ +#include +#include + +inline uint32_t next_pow_2(uint32_t const num) { + if (num <= 1) return num; + return 1 << (CHAR_BIT * sizeof(num) - __builtin_clz(num - 1)); +} \ No newline at end of file diff --git a/csrc/cutlass_extensions/common.cpp b/csrc/cutlass_extensions/common.cpp new file mode 100644 index 0000000000000..3d2093ab94297 --- /dev/null +++ b/csrc/cutlass_extensions/common.cpp @@ -0,0 +1,11 @@ +#include "cutlass_extensions/common.hpp" + +int32_t get_sm_version_num() { + int32_t major_capability, minor_capability; + cudaDeviceGetAttribute(&major_capability, cudaDevAttrComputeCapabilityMajor, + 0); + cudaDeviceGetAttribute(&minor_capability, cudaDevAttrComputeCapabilityMinor, + 0); + int32_t version_num = major_capability * 10 + minor_capability; + return version_num; +} \ No newline at end of file diff --git a/csrc/cutlass_extensions/common.hpp b/csrc/cutlass_extensions/common.hpp new file mode 100644 index 0000000000000..85e359aa57113 --- /dev/null +++ b/csrc/cutlass_extensions/common.hpp @@ -0,0 +1,35 @@ +#pragma once + +#include "cutlass/cutlass.h" +#include +#include "cuda_runtime.h" +#include + +/** + * Helper function for checking CUTLASS errors + */ +#define CUTLASS_CHECK(status) \ + { \ + cutlass::Status error = status; \ + TORCH_CHECK(error == cutlass::Status::kSuccess, \ + cutlassGetStatusString(error)); \ + } + +/** + * Panic wrapper for unwinding CUDA runtime errors + */ +#define CUDA_CHECK(status) \ + { \ + cudaError_t error = status; \ + TORCH_CHECK(error == cudaSuccess, cudaGetErrorString(error)); \ + } + +inline int get_cuda_max_shared_memory_per_block_opt_in(int const device) { + int max_shared_mem_per_block_opt_in = 0; + cudaDeviceGetAttribute(&max_shared_mem_per_block_opt_in, + cudaDevAttrMaxSharedMemoryPerBlockOptin, + device); + return max_shared_mem_per_block_opt_in; +} + +int32_t get_sm_version_num(); diff --git a/csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp b/csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp index 95764ecddc79f..fcc17c7727f94 100644 --- a/csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp +++ b/csrc/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp @@ -36,13 +36,13 @@ struct ScaledEpilogueBase { // Don't want to support nullptr by default template using ColLoad = cutlass::epilogue::fusion::Sm90ColBroadcast< - 0 /*Stages*/, typename EpilogueDescriptor::TileShape, T, + 0 /*Stages*/, typename EpilogueDescriptor::TileShape, T, T, Stride, Int<0>, Int<0>>, 128 / sizeof_bits_v, EnableNullPtr>; // Don't want to support nullptr by default template using RowLoad = cutlass::epilogue::fusion::Sm90RowBroadcast< - 0 /*Stages*/, typename EpilogueDescriptor::TileShape, T, + 0 /*Stages*/, typename EpilogueDescriptor::TileShape, T, T, Stride, Int<1>, Int<0>>, 128 / sizeof_bits_v, EnableNullPtr>; // This utility function constructs the arguments for the load descriptors diff --git a/csrc/ops.h b/csrc/ops.h index 816b471d062d2..c145e4eda0845 100644 --- a/csrc/ops.h +++ b/csrc/ops.h @@ -162,6 +162,15 @@ void cutlass_scaled_mm_azp(torch::Tensor& out, torch::Tensor const& a, torch::Tensor const& azp_adj, c10::optional const& azp, c10::optional const& bias); + +void cutlass_scaled_sparse_mm(torch::Tensor& out, torch::Tensor const& a, + torch::Tensor const& b, torch::Tensor const& e, + torch::Tensor const& a_scales, + torch::Tensor const& b_scales, + c10::optional const& bias); + +bool cutlass_sparse_compress_entry(torch::Tensor& a_compressed, + torch::Tensor& e, torch::Tensor const& a); #endif void static_scaled_int8_quant(torch::Tensor& out, torch::Tensor const& input, diff --git a/csrc/quantization/cutlass_w8a8/common.hpp b/csrc/quantization/cutlass_w8a8/common.hpp deleted file mode 100644 index bf04bb400790f..0000000000000 --- a/csrc/quantization/cutlass_w8a8/common.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include "cutlass/cutlass.h" -#include - -/** - * Helper function for checking CUTLASS errors - */ -#define CUTLASS_CHECK(status) \ - { \ - TORCH_CHECK(status == cutlass::Status::kSuccess, \ - cutlassGetStatusString(status)) \ - } - -inline uint32_t next_pow_2(uint32_t const num) { - if (num <= 1) return num; - return 1 << (CHAR_BIT * sizeof(num) - __builtin_clz(num - 1)); -} - -inline int get_cuda_max_shared_memory_per_block_opt_in(int const device) { - int max_shared_mem_per_block_opt_in = 0; - cudaDeviceGetAttribute(&max_shared_mem_per_block_opt_in, - cudaDevAttrMaxSharedMemoryPerBlockOptin, - device); - return max_shared_mem_per_block_opt_in; -} - diff --git a/csrc/quantization/cutlass_w8a8/scaled_mm_c2x.cuh b/csrc/quantization/cutlass_w8a8/scaled_mm_c2x.cuh index d03242f44ab1d..75681f7f37820 100644 --- a/csrc/quantization/cutlass_w8a8/scaled_mm_c2x.cuh +++ b/csrc/quantization/cutlass_w8a8/scaled_mm_c2x.cuh @@ -21,7 +21,8 @@ #include "cutlass/epilogue/threadblock/fusion/visitors.hpp" #include "cutlass/gemm/kernel/default_gemm_universal_with_visitor.h" -#include "common.hpp" +#include "core/math.hpp" +#include "cutlass_extensions/common.hpp" // clang-format on using namespace cute; diff --git a/csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu b/csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu index 33581a63d4c3d..8190277997161 100644 --- a/csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu +++ b/csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu @@ -24,7 +24,8 @@ #include "cutlass/gemm/collective/collective_builder.hpp" #include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp" -#include "common.hpp" +#include "core/math.hpp" +#include "cutlass_extensions/common.hpp" // clang-format on using namespace cute; diff --git a/csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu b/csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu index 97a969cf5e3e0..4f7b6588ef3f7 100644 --- a/csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu +++ b/csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu @@ -3,6 +3,8 @@ #include #include +#include "cutlass_extensions/common.hpp" + void cutlass_scaled_mm_sm75(torch::Tensor& c, torch::Tensor const& a, torch::Tensor const& b, torch::Tensor const& a_scales, @@ -79,16 +81,6 @@ bool cutlass_scaled_mm_supports_fp8(int64_t cuda_device_capability) { return false; } -int32_t get_sm_version_num() { - int32_t major_capability, minor_capability; - cudaDeviceGetAttribute(&major_capability, cudaDevAttrComputeCapabilityMajor, - 0); - cudaDeviceGetAttribute(&minor_capability, cudaDevAttrComputeCapabilityMinor, - 0); - int32_t version_num = major_capability * 10 + minor_capability; - return version_num; -} - void cutlass_scaled_mm(torch::Tensor& c, torch::Tensor const& a, torch::Tensor const& b, torch::Tensor const& a_scales, torch::Tensor const& b_scales, diff --git a/csrc/sparse/cutlass/sparse_compressor_c3x.cu b/csrc/sparse/cutlass/sparse_compressor_c3x.cu new file mode 100644 index 0000000000000..218c5317b4de6 --- /dev/null +++ b/csrc/sparse/cutlass/sparse_compressor_c3x.cu @@ -0,0 +1,163 @@ +// clang-format will break include orders +// clang-format off +#include + +#include "sparse_scaled_mm_c3x.cuh" + +#include "cutlass/numeric_conversion.h" +#include "cutlass/transform/device/transform_universal_adapter.hpp" +#include "cutlass/transform/kernel/sparse_gemm_compressor.hpp" +#include "cutlass/epilogue/collective/default_epilogue.hpp" + +#include "cutlass/util/host_tensor.h" +#include "cutlass/util/packed_stride.hpp" +// clang-format on + +using namespace cute; +using namespace vllm; + +/// Make A structured sparse by replacing elements with 0 and compress it +template +bool cutlass_sparse_compress(torch::Tensor& a_nzs, torch::Tensor& a_meta, + torch::Tensor const& a) { + // Checks for conformality + TORCH_CHECK(a.dtype() == torch::kInt8 || a.dtype() == torch::kFloat8_e4m3fn || + a.dtype() == torch::kFloat16 || a.dtype() == torch::kBFloat16); + TORCH_CHECK(a.dim() == 2) + // Check for strides and alignment + TORCH_CHECK(a.stride(0) % 4 == 0) // Required for semi-structured sparsity + TORCH_CHECK(a.stride(1) == 1) + + int m = a.size(0); + int k = a.size(1); + + // Sparse kernel setup; this kernel is not used for matmul, + // but just for setting up the compressor utility + // A matrix configuration + using ElementA = ElementA_; + using LayoutTagA = cutlass::layout::RowMajor; + constexpr int AlignmentA = 128 / cutlass::sizeof_bits::value; + // B matrix configuration + using ElementB = ElementA; + using LayoutTagB = cutlass::layout::ColumnMajor; + constexpr int AlignmentB = 128 / cutlass::sizeof_bits::value; + // C/D matrix configuration + using ElementC = float; + using LayoutTagC = cutlass::layout::ColumnMajor; + constexpr int AlignmentC = 128 / cutlass::sizeof_bits::value; + // Core kernel configurations + using ElementAccumulator = ElementAcc_; + using TileShape = Shape<_128, _128, _128>; + using TileShapeRef = Shape<_128, _128, _64>; + using ClusterShape = Shape<_1, _2, _1>; + using KernelSchedule = typename std::conditional< + std::is_same_v, + cutlass::gemm::KernelTmaWarpSpecializedFP8FastAccum, + cutlass::gemm::KernelTmaWarpSpecialized>::type; + + using EpilogueSchedule = cutlass::epilogue::TmaWarpSpecialized; + using ProblemShape = Shape; + + using CollectiveEpilogue = + typename cutlass::epilogue::collective::CollectiveBuilder< + cutlass::arch::Sm90, cutlass::arch::OpClassTensorOp, TileShape, + ClusterShape, cutlass::epilogue::collective::EpilogueTileAuto, + ElementAccumulator, ElementAccumulator, ElementC, LayoutTagC, + AlignmentC, ElementC, LayoutTagC, AlignmentC, + EpilogueSchedule>::CollectiveOp; + + using CollectiveMainloop = + typename cutlass::gemm::collective::CollectiveBuilder< + cutlass::arch::Sm90, cutlass::arch::OpClassSparseTensorOp, ElementA, + LayoutTagA, AlignmentA, ElementB, LayoutTagB, AlignmentB, + ElementAccumulator, TileShape, ClusterShape, + cutlass::gemm::collective::StageCountAutoCarveout( + sizeof(typename CollectiveEpilogue::SharedStorage))>, + KernelSchedule>::CollectiveOp; + + using GemmKernel = + cutlass::gemm::kernel::GemmUniversal; + + using Gemm = cutlass::gemm::device::GemmUniversalAdapter; + + using StrideA = cutlass::gemm::TagToStrideA_t; + using StrideE = StrideA; + + using StrideA = Stride, int64_t>; + + // The n (=1) dimension does not matter for the compressor + typename GemmKernel::ProblemShape prob_shape{m, 1, k, 1}; + + using LayoutA = typename GemmKernel::CollectiveMainloop::LayoutA; + using LayoutE = typename GemmKernel::CollectiveMainloop::LayoutE; + + using ElementE = typename GemmKernel::CollectiveMainloop::ElementE; + using SparseConfig = typename GemmKernel::CollectiveMainloop::SparseConfig; + + // Offline compressor kernel + using CompressorUtility = + cutlass::transform::kernel::StructuredSparseCompressorUtility< + ProblemShape, ElementA, LayoutTagA, SparseConfig>; + + using CompressorKernel = + cutlass::transform::kernel::StructuredSparseCompressor< + ProblemShape, ElementA, LayoutTagA, SparseConfig, + cutlass::arch::Sm90>; + + using Compressor = + cutlass::transform::device::TransformUniversalAdapter; + + auto [M, N, K, L] = prob_shape; + + StrideA stride_A; + stride_A = + cutlass::make_cute_packed_stride(StrideA{}, cute::make_shape(M, K, L)); + + CompressorUtility compressor_utility(prob_shape, stride_A); + + int ME = compressor_utility.get_metadata_m_physical(); + int KE = compressor_utility.get_metadata_k_physical(); + int KC = compressor_utility.get_tensorA_k_physical(); + + auto a_ptr = static_cast(a.data_ptr()); + + auto a_nzs_ptr = static_cast(a_nzs.data_ptr()); + auto a_meta_ptr = static_cast( + a_meta.data_ptr()); + + cutlass::KernelHardwareInfo hw_info; + hw_info.device_id = 0; + hw_info.sm_count = + cutlass::KernelHardwareInfo::query_device_multiprocessor_count( + hw_info.device_id); + typename Compressor::Arguments arguments{ + prob_shape, {a_ptr, stride_A, a_nzs_ptr, a_meta_ptr}, {hw_info}}; + + Compressor compressor_op; + size_t workspace_size = Compressor::get_workspace_size(arguments); + cutlass::device_memory::allocation workspace(workspace_size); + + CUTLASS_CHECK(compressor_op.can_implement(arguments)); + CUTLASS_CHECK(compressor_op.initialize(arguments, workspace.get())); + CUTLASS_CHECK(compressor_op.run()); + CUDA_CHECK(cudaDeviceSynchronize()); + + return true; +} + +bool cutlass_sparse_compress_sm90(torch::Tensor& a_nzs, torch::Tensor& a_meta, + torch::Tensor const& a) { + if (a.dtype() == torch::kBFloat16) { + return cutlass_sparse_compress(a_nzs, a_meta, + a); + } else if (a.dtype() == torch::kFloat16) { + return cutlass_sparse_compress(a_nzs, a_meta, a); + } else if (a.dtype() == torch::kFloat8_e4m3fn) { + return cutlass_sparse_compress(a_nzs, a_meta, + a); + } else if (a.dtype() == torch::kInt8) { + return cutlass_sparse_compress(a_nzs, a_meta, a); + } + return false; +} \ No newline at end of file diff --git a/csrc/sparse/cutlass/sparse_compressor_entry.cu b/csrc/sparse/cutlass/sparse_compressor_entry.cu new file mode 100644 index 0000000000000..d23d937b6ac28 --- /dev/null +++ b/csrc/sparse/cutlass/sparse_compressor_entry.cu @@ -0,0 +1,42 @@ +#include + +#include +#include + +#include "cutlass_extensions/common.hpp" + +#if defined ENABLE_SCALED_MM_C3X && ENABLE_SCALED_MM_C3X +bool cutlass_sparse_compress_sm90(torch::Tensor& a_nzs, torch::Tensor& a_meta, + torch::Tensor const& a); +#endif + +bool cutlass_sparse_compress_entry(torch::Tensor& a_nzs, torch::Tensor& a_meta, + torch::Tensor const& a) { + // Checks for conformality + TORCH_CHECK(a.dim() == 2 && a_meta.dim() == 2 && a_nzs.dim() == 2); + TORCH_CHECK(a.size(0) == a_nzs.size(0) && a.size(0) == a_meta.size(0) && + a_nzs.size(1) * 2 == a.size(1) && + a_meta.size(1) * 2 * 4 == a.size(1)); + // Considering elemsPerMetaElem = 8b / 2b_per_nz = 4 + + // Check for strides and alignment + TORCH_CHECK(a.stride(1) == 1 && a_nzs.stride(1) == 1 && + a_meta.stride(1) == 1); // Row-major + TORCH_CHECK(a.stride(0) % 8 == 0); // 8 Byte Alignment for Compression + + at::cuda::OptionalCUDAGuard const device_guard(device_of(a)); + int32_t version_num = get_sm_version_num(); + + // Guard against compilation issues for sm90 kernels +#if defined ENABLE_SCALED_MM_C3X && ENABLE_SCALED_MM_C3X + if (version_num >= 90) { + return cutlass_sparse_compress_sm90(a_nzs, a_meta, a); + } +#endif + + TORCH_CHECK_NOT_IMPLEMENTED( + false, + "No compiled cutlass_scaled_sparse_mm for a compute capability less than " + "CUDA device capability: ", + version_num); +} diff --git a/csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu b/csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu new file mode 100644 index 0000000000000..b50e9a3a2c240 --- /dev/null +++ b/csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu @@ -0,0 +1,303 @@ +// clang-format will break include orders +// clang-format off +#include + +#if defined CUDA_VERSION && CUDA_VERSION >= 12000 +#include "sparse_scaled_mm_c3x.cuh" +// clang-format on + +using namespace cute; +using namespace vllm; + +template typename Epilogue, + typename... EpilogueArgs> +void cutlass_gemm_sm90_fp8_dispatch(torch::Tensor& out, torch::Tensor const& a, + torch::Tensor const& bt_nzs, + torch::Tensor const& bt_meta, + EpilogueArgs&&... args) { + static_assert(std::is_same()); + TORCH_CHECK(a.dtype() == torch::kFloat8_e4m3fn); + TORCH_CHECK(bt_meta.dtype() == torch::kUInt8); + TORCH_CHECK(bt_nzs.dtype() == torch::kFloat8_e4m3fn); + + using Cutlass3xGemmDefault = + typename sm90_config_default::Cutlass3xGemm; + using Cutlass3xGemmM64 = + typename sm90_fp8_config_M64::Cutlass3xGemm; + using Cutlass3xGemmM128 = + typename sm90_fp8_config_M128::Cutlass3xGemm; + using Cutlass3xGemmM256 = + typename sm90_fp8_config_M256::Cutlass3xGemm; + using Cutlass3xGemmM512 = + typename sm90_fp8_config_M512::Cutlass3xGemm; + + using Cutlass3xGemm1 = + typename sm90_fp8_config_1::Cutlass3xGemm; + using Cutlass3xGemm2 = + typename sm90_fp8_config_2::Cutlass3xGemm; + using Cutlass3xGemm3 = + typename sm90_fp8_config_3::Cutlass3xGemm; + using Cutlass3xGemm4 = + typename sm90_fp8_config_4::Cutlass3xGemm; + using Cutlass3xGemm5 = + typename sm90_fp8_config_5::Cutlass3xGemm; + using Cutlass3xGemm6 = + typename sm90_fp8_config_6::Cutlass3xGemm; + using Cutlass3xGemm7 = + typename sm90_fp8_config_7::Cutlass3xGemm; + using Cutlass3xGemm8 = + typename sm90_fp8_config_8::Cutlass3xGemm; + + uint32_t const n = bt_nzs.size(0); + uint32_t const m = a.size(0); // Batch size + uint32_t const mp2 = + std::max(static_cast(64), next_pow_2(m)); // next power of 2 + + if (mp2 <= 64) { + if (n == 28672) { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else if (n == 4096 || n == 6144) { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } + } else if (mp2 <= 128) { + if (n == 4096) { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else if (n == 28672) { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else if (n == 6144) { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } + } else if (mp2 <= 256) { + if (n == 4096) { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else if (n == 28672) { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else if (n == 6144) { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } + } else { + if (n == 6144 || n == 28672) { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else if (n == 4096) { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } + } + + // Otherwise the default heuristic + if (mp2 <= 64) { + // n in [1, 64] + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else if (mp2 <= 128) { + // n in (64, 128] + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else if (mp2 <= 256) { + // n in (128, 256] + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else { + // n in (256, inf) + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } +} + +template typename Epilogue, + typename... EpilogueArgs> +void cutlass_gemm_sm90_fp16_dispatch(torch::Tensor& out, torch::Tensor const& a, + torch::Tensor const& bt_nzs, + torch::Tensor const& bt_meta, + EpilogueArgs&&... args) { + static_assert(std::is_same()); + TORCH_CHECK(a.dtype() == torch::kFloat16); + TORCH_CHECK(bt_meta.dtype() == torch::kUInt8); + TORCH_CHECK(bt_nzs.dtype() == torch::kFloat16); + + using Cutlass3xGemmDefault = + typename sm90_config_default::Cutlass3xGemm; + + // m in (128, inf) + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); +} + +template typename Epilogue, + typename... EpilogueArgs> +void cutlass_gemm_sm90_bf16_dispatch(torch::Tensor& out, torch::Tensor const& a, + torch::Tensor const& bt_nzs, + torch::Tensor const& bt_meta, + EpilogueArgs&&... args) { + static_assert(std::is_same()); + TORCH_CHECK(a.dtype() == torch::kBFloat16); + TORCH_CHECK(bt_meta.dtype() == torch::kUInt8); + TORCH_CHECK(bt_nzs.dtype() == torch::kBFloat16); + + using Cutlass3xGemmDefault = + typename sm90_config_default::Cutlass3xGemm; + + // m in (128, inf) + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); +} + +template typename Epilogue, + typename... EpilogueArgs> +void cutlass_gemm_sm90_int8_dispatch(torch::Tensor& out, torch::Tensor const& a, + torch::Tensor const& bt_nzs, + torch::Tensor const& bt_meta, + EpilogueArgs&&... args) { + static_assert(std::is_same()); + TORCH_CHECK(a.dtype() == torch::kInt8); + TORCH_CHECK(bt_meta.dtype() == torch::kUInt8); + TORCH_CHECK(bt_nzs.dtype() == torch::kInt8); + + using Cutlass3xGemmDefault = + typename sm90_config_default::Cutlass3xGemm; + using Cutlass3xGemmM128 = + typename sm90_int8_config_M128::Cutlass3xGemm; + using Cutlass3xGemmM64 = + typename sm90_int8_config_M64::Cutlass3xGemm; + using Cutlass3xGemmM32NBig = + typename sm90_int8_config_M32_NBig::Cutlass3xGemm; + using Cutlass3xGemmM32NSmall = + typename sm90_int8_config_M32_NSmall::Cutlass3xGemm; + + uint32_t const n = out.size(1); + bool const is_small_n = n < 8192; + + uint32_t const m = a.size(0); + uint32_t const mp2 = + std::max(static_cast(32), next_pow_2(m)); // next power of 2 + + if (mp2 <= 32) { + // m in [1, 32] + if (is_small_n) { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else { + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } + } else if (mp2 <= 64) { + // m in (32, 64] + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else if (mp2 <= 128) { + // m in (64, 128] + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } else { + // m in (128, inf) + return cutlass_sparse_gemm_caller( + out, a, bt_nzs, bt_meta, std::forward(args)...); + } +} + +template