From aabd3d72c49799cbf8614af74cd66ef344f9c9c0 Mon Sep 17 00:00:00 2001 From: Derrick Mwiti Date: Wed, 7 Jun 2023 12:23:27 +0300 Subject: [PATCH 1/8] AWS text classification benchmark --- .../benchmark_deepsparse.py | 67 +++++++++++++++++++ .../benchmark_huggingface.py | 49 ++++++++++++++ examples/aws-text-benchmarks/readme.md | 47 +++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 examples/aws-text-benchmarks/benchmark_deepsparse.py create mode 100644 examples/aws-text-benchmarks/benchmark_huggingface.py create mode 100644 examples/aws-text-benchmarks/readme.md diff --git a/examples/aws-text-benchmarks/benchmark_deepsparse.py b/examples/aws-text-benchmarks/benchmark_deepsparse.py new file mode 100644 index 0000000000..9d7d7fdcbb --- /dev/null +++ b/examples/aws-text-benchmarks/benchmark_deepsparse.py @@ -0,0 +1,67 @@ +from deepsparse import Pipeline, Context +import deepsparse.transformers +from datasets import load_dataset +from transformers import AutoTokenizer +from tqdm import tqdm +import numpy as np +import time, os + +os.environ["NM_BIND_THREADS_TO_CORES"] = "1" +INPUT_COL = "text" +dataset = load_dataset("ag_news", split="train[:3000]") +batch_size = 64 +buckets = [32, 64] +model_path = "./sparse-model/deployment/" + +### TOKENIZE DATASET - (used to comptue buckets) +tokenizer = AutoTokenizer.from_pretrained(model_path) + + +def pre_process_fn(examples): + return tokenizer(examples[INPUT_COL], add_special_tokens=True, return_tensors="np", padding=False, truncation=False) + + +dataset = dataset.map(pre_process_fn, batched=True) +dataset = dataset.add_column("num_tokens", list(map(len, dataset["input_ids"]))) +dataset = dataset.sort("num_tokens") +max_token_len = dataset[-1]["num_tokens"] + +### SPLIT DATA INTO BATCHES +num_pad_items = batch_size - (dataset.num_rows % batch_size) +inputs = ([""] * num_pad_items) + dataset[INPUT_COL] +batches = [] + +for b_index_start in range(0, len(inputs), batch_size): + batches.append(inputs[b_index_start:b_index_start + batch_size]) + +## RUN THROUPUT TESTING +print("\nCompiling models:") +buckets = [32, 64] + +tc_pipeline = Pipeline.create( + task="zero_shot_text_classification", + model_path=model_path, + model_scheme="mnli", + sequence_length=buckets, + batch_size=batch_size, + context=Context(num_streams=1) +) +print("\nRunning test:") +# run inferences on the datset +start = time.perf_counter() + +predictions = [] +for batch in tqdm(batches): + predictions.append(tc_pipeline(sequences=batch, labels=['Sports', 'Business', 'Sci/Tech'])) + +# flatten and remove padded predictions +predictions = [pred for sublist in predictions for pred in sublist.labels] +predictions = predictions[num_pad_items:] +end = time.perf_counter() + +# compute throughput +total_time_executing = end - start +print(f"Total time: {total_time_executing}") +items_per_sec = len(predictions) / total_time_executing + +print(f"Items Per Second: {items_per_sec}") \ No newline at end of file diff --git a/examples/aws-text-benchmarks/benchmark_huggingface.py b/examples/aws-text-benchmarks/benchmark_huggingface.py new file mode 100644 index 0000000000..c97ada5222 --- /dev/null +++ b/examples/aws-text-benchmarks/benchmark_huggingface.py @@ -0,0 +1,49 @@ +from datasets import load_dataset +from transformers import pipeline, AutoTokenizer +from transformers.pipelines.pt_utils import KeyDataset +from tqdm import tqdm +import time +import torch +model_path = "./dense-model/training/" +batch_size = 64 + +### SETUP DATASETS - in this case, we download ag_news +print("Setting up the dataset:") + +INPUT_COL = "text" +dataset = load_dataset("ag_news", split="train[:3000]") + +### TOKENIZE DATASETS - to sort dataset +tokenizer = AutoTokenizer.from_pretrained(model_path) + +def pre_process_fn(examples): + return tokenizer(examples[INPUT_COL], add_special_tokens=True, return_tensors="np", padding=False, truncation=False) + +dataset = dataset.map(pre_process_fn, batched=True) +dataset = dataset.add_column("num_tokens", list(map(len, dataset["input_ids"]))) +dataset = dataset.sort("num_tokens") + +### SPLIT DATA INTO BATCHES +hf_dataset = KeyDataset(dataset, INPUT_COL) + +### RUN THROUGPUT TESTING +# load model +hf_pipeline = pipeline("zero-shot-classification", model_path, batch_size=batch_size,device=("cuda:0" if torch.cuda.is_available() else "cpu"), ) + +# run inferences +start = time.perf_counter() + +predictions = [] +for prediction in hf_pipeline(hf_dataset,candidate_labels=['Sports', 'Business', 'Sci/Tech']): + predictions.append(prediction) + +# torch.cuda.synchronize() + +end = time.perf_counter() + +# compute throughput +total_time_executing = end - start +items_per_sec = len(predictions) / total_time_executing + +print(f"Total time: {total_time_executing}") +print(f"Items Per Second: {items_per_sec}") diff --git a/examples/aws-text-benchmarks/readme.md b/examples/aws-text-benchmarks/readme.md new file mode 100644 index 0000000000..b6ddc4a483 --- /dev/null +++ b/examples/aws-text-benchmarks/readme.md @@ -0,0 +1,47 @@ +This repo contains example benchmarking scripts for computing throughput of DeepSparse with a sparse model and throughput of HuggingFace + PyTorch on a GPU with a dense model. + +In this example, we run on the `ag_news` dataset with models downloaded from SparseZoo. + +## Sparse Model DeepSparse + +Install DeepSparse: + +```bash +pip install deepsparse +``` + +Download Sparse Model: + +```bash +sparsezoo.download zoo:nlp/text_classification/bert-large/pytorch/huggingface/mnli/pruned90_quant-none --save-dir ./sparse-model +``` + +Run DeepSparse Benchmark (creates buckets for token len `32,64` and `max_token_len`): + +```bash +python benchmark_deepsparse.py +``` + +Note: DeepSparse uses static input shapes. Since the distribution of inputs for a dataset will be varied (multiple different sequence lengths), +we can use bucketing where we compile DeepSparse with multiple input shapes and dynamically route inputs. + +As such, we used buckets of length 32, 64, and max_tokens. DeepSparse runs best with sequence lengths that are multiples of 16. + +## Dense Model GPU + +Install `transformers` and `datasets`: +``` +pip install transformers[torch] +pip install datasets +pip install sparzeoo +``` + +Download Dense Model: +```bash +sparsezoo.download zoo:nlp/text_classification/bert-large/pytorch/huggingface/mnli/base-none --save-dir ./dense-model +``` + +Run HF Benchmark (on GPU): +``` +python benchmark_huggingface.py +``` From b35ec9400de0a72bda269ed5ac27d4853c68208f Mon Sep 17 00:00:00 2001 From: Derrick Mwiti Date: Wed, 7 Jun 2023 17:59:05 +0300 Subject: [PATCH 2/8] add histogram --- examples/aws-text-benchmarks/image.png | Bin 0 -> 26090 bytes examples/aws-text-benchmarks/readme.md | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 examples/aws-text-benchmarks/image.png diff --git a/examples/aws-text-benchmarks/image.png b/examples/aws-text-benchmarks/image.png new file mode 100644 index 0000000000000000000000000000000000000000..9992593d50153d5647384ee9f09303a6e5b94528 GIT binary patch literal 26090 zcmdSB2UwNawk?P`p;VbbVyTFVBoP6LDk_pCOAtgva#C{Cqd=KJK|ny*AW1+($r(ij z5y?r|1QC%WNpjfm#?o_7_kDf7?tWk2?)Un9oU+Qj`NLXs%{k_nW6s;>lw~(>WZp?t)mx|Ox~Kl}Q%_?y5i(ere4Ca>g9ozQS;8>X4su{uZf&3Ow}in}m8SlwUr zd3({Hr}t+!4r-~){Fu&n6|i@<7n!l!%V#4p{f1`TGd=SC`z7PO!r#A+T-l%CTp%kx z7$q#Iq`E!wlr3kS-hyL`tZmp-M@ z*ZTJyU=K)n`t*|gI`UPQN0$YYmX?-ACr!Ofu2=U>wB|BPON4;a>C z$>^@HTWvgQMBFsCI>JV0!Cw zK7+z%m7)BW9r=#(oLfJg#9MjzSy8pz-3yc{QmLC+gmZgYYb>}T9??bC^f``pN@itc z4RjW{f4nRxNG~()qCYh?m7JEQt*ZKv{_Q!wP<2^ZKV4m2ApwEE-U?`_s>ZZtn5HBr z%S%yI>Sx7+6uDvxoJW)6<4;#yR`V(`JbLt~g@uKhtLvNy^W~B*eo9T^`@4)oHA_Eg zGz<-;vuf63{Wg_lTStmH$}e|IFRq*OC^jvA?xeQTfiJWn%d*9xzjpV960Zxfv9W=1 zWf zk){;wEay>!$Ya;e;37X4Xn3EN7Z(aL%^SFaLqgp6-W+HbWx~FzsXeW)uQ#3FfK_^I z!&9k-mln65e~!hm4o}#6`&I~#&Qm=8p`CXRMpO;9Wm$?j{dkP6!z*gwmy6tI2EV+y znf&Zotv{RiqpB;8i?jAc%S#J_O6t-};{iKvpK5GrNu25OHl%n?hkdW1r1e&YTH4v2 zqj=3Hb$Ty*`kdTt-I1>ubFN>HO*~Q*)>5ojqqh|Ph>mUjY9Z5V)~1%0Lw~G4z`CU} zou?}7h-_6=RsOX;e_cJjqr$=o+x9EtBTKxTZjMdl&$+@ockfPB8ZN1k+cl@^C1QCx za_wdMsw34M9kZ7D1D4KCe6MQw@+JCe-jgk?qR~f8tIdYnvQ)N5vz5DkTPBCV(6s(a zw1T{RdkKutN_eO^WBuxtS6O8vBO?cY{AkQ~9Llt4;?~sElnc~KGe`}PUOL~=(P3d} zsV>0OSQQ}>telv2@y$wJy=+C8)9q6yv3m_k>WPi@^-pm+)1EBl@Kn;&A5>g^RMVF@ z?}}T#zILfT-%iv&)-)RR?Ce^WmMQfEnFd~SCYsvXL!~~OK3>U4e*Rp; z)iqDAL|;QAEpTUcZ0td_`{%&b%I{5Ic8J64A)6>Gt z%;hq13Kr((b=ZZb*4BY(W>z+~Cj193KcnyIfG4@0bU4ZS2k!d9peWh7%d=&;?6C8Q zeicR9@YQ+60S8_9ob4O)V$L&D-tbK>Da5ON?3<8}SA&&p^f_r$%lm_oer<5o73>Zd zHJihgC&VNvBSU?^@{`(lh0u1LekyG^dfL&XBeicXo+D>DIay6Oq6y!ARB_p%r)=|uT)Pw9cIOk! zxr5~o22~tjzAw*A;bmuG2{K~3b0&7<+MD?2$KI6o(V(1%$MsndJ@+`-6bS$1z%@{1Q* zTtRhXV=q5eR_@s1*VNP$KQdx7v$o{MjTKYV)7;xPCc&3rW#KpAf%2V4qtucfxf@^7 z(~I!+^V8JTO}=wy;~Xu(Tje5?uVRA(iv!~lrOlKqT#}sL+1~0%@pK*q6B84xKCC0( zb0HfiovSjvv9>lEK_w|B=0uKdSMzvJdGE|ZaAmK@=s^7F;4I}3H(r+?Hf(H4k@?Y(*|QruY??p?RQDHTfwDqVtsQ0ovxjQ_lp~tunGYd=Bl`7_$G8x?TV!EOQO98pXw~8RF5VnW=(4mI! zRS{W6Z`bmke|a2RgO5XdMCT_PRI+Tlq;~nn`1-CLZqHL^5w@}`uzB?8QGb<4mxd@S zS%oZk&owtsR7FYZ6*vvoe+)jfWBuv`HwnZcfhr~W>;9+qw56Ne-N~yP_v+PQosiz< zliC^@&z?R#`S9#1U!!N(F7kT{{6%q>1rH)5;27eSgC9N0ovJ_pxPSk?+fZswrqi%4 z{{^gUkJw0jLhV?F7NF5v;Olz+ZpwCGu50c=+YYXhk z?b(7q|GVJ&xG{MBVb9iVA;GT)BBOFU1mezrs+H#9-hkg2{v&$)K4*}>*ZAE+e~z3E zYyZ8c_tdL+eT8?Fnmbki8x=bB?u|&GvVv?t+k8Pu2A5SpdHm_Sio@*#5rZ@biy z+3D`ecI;AR_NeX5LcBHA!;R)h`Nx|iDS3F1{W90f;wrDO;R{2{3(G&#pD$2lb|2gs z@z#i?x$r$i4l?|8e8~XD-lU(0FvVwIv)G z4p@?g{G5k@uMxs#N?hD21Sly=O^5Tvi^l;c;6SdjmU}({ysEEPgGql7QgU9dt5e-f zPk$g-UM_IY!@uPf6m|rZn=qAW1RTVBCcKi{P>ZmTs2o{;d)t18zG}98L2_$Wu0Tk! zq|Hx}6OYi?J2Cf2IdQ5^mcx8zq+@VsD5z^R)QIWloQH*#RikBEQ3ApxT=B#3QU zp~fV2F}p9>lRy%i9$Z00HY`SDO&u6^vCJ9pOC;kJ*3g0@Cyv_7|K&y8`P8A&QG zHmEbxXtmPHvFWHE^_mgi%)l_TUwZB~{F{kRNE?;f)Y{TpP&~&WV~o)wzXVfk0|r5U8;EOEgVwM0j2@;EJy@; z_UwTX6T%sZ3|P+;xJH7wfO{U9=nzk5(^;4_)RHy?42*E)WSYu>=Uhk}bFNJ1YP>+) zfuV`o%1k4sjjLB~%kG_U#ReUUR6fKWfS*bsrLcq~Ga>EE~xtZ@L!&w58 zHNiXqfL9^-6>nN2(wl#CiYTy$=HP_(IMhKTiCCA3PM+?+~R%+JQb# zUs?NwejUfs*t!daE~+6Wc~sl3;?(;rB3CDN9)Y-aqC7zTZ{C~;4hd7(nw34NK z1g3Az2O<)Za1=Ilsq5?WVCm;4%?Zo}4COv}Q1!s}K%vh2J#WOm18i9dA8HT+5N~I1 z*yg2LVo25X^5tl)}WvCVvN=c z{iL&;KZAR>!f`Ose;l2G$zkns)crV#YY3`1%gj&F3PF}eEN?!U-oj$ya^jD zomso`hU{7!+~A8U*S_O=A@Q&V&&fJjF_*DuDJWE={UsjQ-SRf z8-gn0`1Et5{r$H9I*@~g2uvOySqiaanApKT)_*`MCGBl6JUk2ttg57RUtFEW`OF>1 z<~k#MM0r`+%U zL09TSeo8ryJb9s?_Y_`~L2CB+ItKPfMoh>p6+(FG7`(<-@#>_Xf)D291S5&eW+rF~igxnC34-o~!ZZdn>RVKEQS23Q71*pFXWFnSfh9 zF2kbA#;Oi~ekGG}r7HYohdt|W1O)R#>O@YQW%F}YbMvG#XU<%vWb2n``8#e-Rs8XP$|d|)M$IMADql|c@7=ul7@W)U^1AmuYUWA1gK|89$u|BO zJ+4@BC7wa*<>ZXF?ZF7g%S^s;}6+(sXUsF^{IOPR_@6GK!RVW zdyn@drSlAYSJD%)Ri1i?wNu<;@&9w2_HP-af4l2T6)xwf*(5xRi6tv??X6754uKE( zI-ZLYjIdfz(jzLL*dWgpglrVqIi3$0B5&hfR;6vJtU0wVQLRmH)Vy9#4lGwG$z186sX6);09u;a=U0EB7zGdVEG!))Dz4NzS?!I%g)oN7!<+2Mo=Q_soB}$>1>dlqNy5I7>;T)W-B=)#~YfKWorO^4D7_mtaz zn^^$H;;9U*>1%knR~C<(hV?Va&_U)t0M~No{iU{7#(so{I{sJmSO(|q8 zK7Rb@Fx1T3k#|kj(b2Ij?^+@-v_rUo^UVJGM3yPdr6c%w&jgtC&Fy?>D~ zlC#`cf=wdyQXkTJfL0O7PRFVO4gguac)}_7f)>Ld)Jy@AXid-GF z5<)_8QBmx3v>?Ilc!tMUih~+~Cy+TOsz}Y9di(Y*S#)5TeRBABKyieGZ8mh13p88} zQ43xRT!N}#*dxw;TQ@lSwhGYWPyGB7#DrE#{sJ+fd>87;!YhRG9i-G0P;G(l079?* zZy=`JzOkVp)^lNc068^D+ey@U`gD(p;4Z+sWBz*^g!X5q1OdPz7GQ6BMALaJOP$Om zjx!dw*t5=z7k}!T0E2Ue#1$Etn^x2i&inrUAJd=vc?sjy#tsCrM;8{{ zGR-f|l%$zdh9F=c2(3K0ePe}?67J=!mHQW^24Di1ZO%g~hlewwDt~vYJuBV`KL~?% zqKh8{e#|TYJI3ugawWGLA7XbMy{xYD9eCs|>MzrA1S>0nL?BW=J`)RDG*L~M9|8$+1-yR~_7rq6C_z{1i8Y2hOc^!1{2`gj}KxcP*vH zjkk91IHc7-)ncf9`m_)HGZL{ZYpO7gcBG`cR!f@U@AQxGwoX4j$2@)dG>Kguuc&B@ zv-!6BrLavqQnYlx;w!&BC_r|DP={1C$QLzGxg+g)u}CbEleu{fi}g3{raQNDH zMQ4-OrBa4AI*c2AGC|^2MT%>qmLWjzymMYPg@IIPR2t$^PxU);&8m7vbIg3c)jNO` zHRej>Pe)k(^H^_|gvPO#4l^%O%^W0-C93)R`t8%bx{B_)>5di5>Rgo2>5;6a`at!V z9XTd)A~vFK?<{`csx_-u3MJgLR;a4V4v$EjKa0PLYHwj=Bl(xz|NR;aN545|M6j4R z+kJmA!F%IPi={?M+07c9KOMYxD%jvD7GrYFnb%LIzvaEjm zE~EDZ0}HDxGn?Zh<$z_7OT|*MVV^XX%kT_ck}D^L1ckr%w;$63eKeBYZ6s6iEyumR zPE~fa(0Jg(p9=m*ig|+rtD~|?eghi6^|cLu8=d_g$`!8EFx%X_GwhzjkH_(bi=~0I z<5&1@nG=YKB`h~mlF75W@Yso z2(=!+?wgWUQ)fuiH0Z1D;CB$dH_qIDt4cQ$^J9i@SKc{zt;qd0wdq& zz%tV)Ki|0TSNim%1mOlJ&s_I9ao=CQz**02R1(g*a^9B#rKYie{B^t}n$=W-M?D>D z@n!R`%HDr1!MA)_Dwl)^1tc}u)(U|*>>xN4C%l|$`X&I9Nbs16NH~aU7=GRgv=94~ z^ZYRUj8Bz5GvK{l+E4;OtpzUmsEEkW&)!PF3BsF$w9f=|0Te@KPdo<_JrGT5D9N#8 zZhm_XISolo5#0`Ly?vIH|7?)4BSn0cm8FTN0k+Y7b;*aIhqIg~|0eMw4)Ma8n!R@I zT8Dx9eUYN}C*daYy=eIco>Q8H6o%hL^$NdC;wVY>$WsXmgQ>3&phtuRHYV ziRJJ3@h}TO+OwRQ4g+e0qEJWPjB9m`YVUb?j&tinP&z2j09H*6BvO)Jy;2o-9??K3 z+fVU$%3nNtGQs>8!;_X~`YV4Ax}>(s0j?3K5osQXlT7y+n;FZ1g{YJF9sc^`9M4X5 zA?9H7aO{+%q-4I!_*3jur%TV~RD&X2WW9A}jWk^>Eg|UX*K)X9`S(ctT!z(LPdhSj zq}JyzaR~`98#5nhC^K>g*H;AXC)CZ?ua^i`o*Hb@&AS>8dJsj1Mv!ftod%j38fJ(P z;3OV$^6KZFQ&LjOcNz`{EJpgd=(jSnM--GkP@6hm=(U^IyuC|+uoM2HBBoYgC$Rw} z8u)P_SyHWG-9i=+bua?|u!WQJLumlrBwB}7COvz0cHE^svcmuUC2~h2Cc;K=1-bV3 zeY<+>La~RA9|K1-Hlbq+I|xgP_fj4q*jm?Nql%j!tusT=28V>Vmq&RBKd3{Ac`fEn zk94E}E19EULKp_&h+xh&D{pKFFk~LxN)i(gwewJkkh-!^`br>`7@fM za2%9Al59dXT3cJ^I}8NcbmYe)zn`P^_$@rUZbi*@@9DlNnXm~5hFB?h29duRb%$x? z%d;H86_?l7<6^VA>N}%3ctXeKJhlzSdP-4{D`@EIa#yAc2?=phv$4tq(>#3mr^spr zH5{7{AO0?=iM9Dli2iM4x0k>HMjI^;jbHCMW&&i!R&KA&=`ps(#wiN&3YVWMOOD^7->)ERUGDI7p-f*a&`|RJDzbjZuwuwu}w{8X-^zFcL^x z3uFw~Kti9%_-z@84Uh`tL>bNVXT^T9*CbZ~ap`k1=3hBX{-YT{Izp}OmGysCP@sp5 z43F*B7wJschoZ@P1>Jmyq-u(E8m`a1^Das;peKKjsDPTq`)3)KZ^6?`rLXxx>s$^PtV19PX>6*b1bbZx%7(h;ye;viEZm^ zYht>MWczBU`CguGzP;rG!;Xzq_hn~cCG#p~^$@%bqM`w(#v>^|`kT5J%{}N1ZsptJ z1+8}b5`*}EuN3%NI*egN)E~*CH5GgfBu;%375EXjw_#!D^r0V?OumRuQ2u-?hHDKl z4^L2RKy-~597m<{wUCJUiZ**z_@MzLEF^%FL>hcIT)0K=N5a_znum`RBf68wO#CjA zKDb`OzM{ITw&$Up4ACNieyNUDAg=l~1Wf+s*4Ek|Sw2JXX0tv|m!fud8033=AG z0_QZ6ih;zELw*9cHUEaeD;nwu<&-}maYOFVJ5#v^TXIT8>K-iPt`0uci?sdLL z7m>f3fy4&IMI0Rz4VQI73Jjrd7-&c&njhqkl*MoRk-0TOr{wtU!=HdnfG-3+ke~4)8mEt*UIZC%dX0ye8(8>g$W6L^SCR!2CFV| zG-*YCe!ebpY#^iy7cR8r*isg<{-MDgPXGy6HjX)ytFTo{IQS1blAe<9s0JQ`Xc(GVSmV_vcrDb3X05G|d!3@R)m;`YVDkOpf;~NH& zyx5DKcyKbttR6S=u^PS?pa*O8=FJHN1R};uPp{wHiV9bJeEgkLdj>~G0bJB@5nxl; z2bj8!i(zLuHMF$2Ag^HvRVx2y!7 z;|e`TNU0So5485?qTR{AH?Lj^unHm(Cq!hkm#g$!fnSW;cbTKjYoOE#svzngFcs=j zlu%BfS5S3Iy+jN6<(Odniu^x3HotvbyDC3=Z000?|4B-p?3!<-#Q}qddGGs;!y+xf zq5z?d)a+tnsx@5dHRMLAY+{ON0Ux`mGe-lApn?NR0D>XNRZzdgk!3Bf>+gSt7sf{* zJ@8M(hDN{vfZhdBRM`Ll(yjHtegqWWs1=|G^@d3A9MK~R-~p8c_{+y1V{iu|iVFSd zWp}i6-Lj2N2tX(BKCq9Fpx{#|J^%$Pe$jW14h{z*l?#s6KxKiC)ga3O!bG{5s9nI6 zb`-g5LnML-j~fI?vcDBzpCcg6WowYwDu@7&f>B0|8$`iDToQtf#6($ANyDX3&3JnH zE)i}KijGh+I9S*dhwjofgkt$8xa}HkY0tanI6G!U=&l6we9!0z)HOj_{aer?weaIe zq=YMY#S8ha6A7i=FTExry83^#X6R;Hg%P=@Sld7q z+zoyO3*DS%n1p!$r?5YC#?_zIL12>vRN=p17sz* zVSFEiHdIMdMWdc7n;7{T8N6p@z#I{-7wG}cMhlP^DL3Wpi^K~;D1#zCVV)qZRDi1M z#S7&KVe5L7t)T}`@&9O)O~2b@xoZJpEn*`5{f?>8-l^KDTPxQ{Sj zV3%PrfW9yhuu)W&Hk4<3?28tERuWbcM}V~)#Z00~y!47GqlQ?WMv(3*NCS!b=;zO$ zX&oY<=>V_6;~C~rAmBQ+XInmq1rQcH5-jmZ|2sq@F=N&P*+#K~dj8hkW7eYnjV*os zKTTxEb7R+OU*hK$)X+BlP_qlgmMFomYoheBdD%oS4wc=1zUT#kM_RGPpy5*Gb#Cs* zQ3#nTj4POHc*`GxYH@Ue!X!1J?3}Q8lPo|$!d{E$!@R1DxEF#qfsSMUSt9!{aPXg> z4`2L$Pz_7qefxNDFRsz>kceL%(?>@7OU-{!bIsdbzN@Rb_M%zER`r-#4G!P}m#9DS zgprb#LtTpzPdub5?8{7Jy%&usOQFmAqR;)EgcTgCBHxKuQ{(Pc{M_8q_aQrR{sW$H z!k^i!P~h2ZvIeXEE8gx4fn}oQ53LruhqH%2*eA^Q<1Nk|zwUJ=GZUgNb%niqXByIt z&hqlA8FFmEdVbM4fe-cbE)CTzOC11Xim%ViPwP#cKK*FWAwlQatH6@1}YLJT1##6JmJ|Ksyv1fu~6^BSR{Kz&U^T|G9Z z%S(?)Y38SfjzSNnEvppGIzv89RR2sOa>wWL=?x1LpSmFM_ zF>r-YiOAK%8)tbgI1$~p zb||fc?-wH^Bq8x!JyCf8oI5EYNiUc#E6T{ofVufaO?;+#{nB(?4o?7K!sRgT^1wug zs2T`YahH)lKcgCPmdHL)oJ?>#AW)%9AvJ4W%Ba`%khuFSO0+r3x*Y*3Q_%Te?|TEX z!W79cN)};+rc2Z3OH4@MBHIJ+PG}2=6mr6MZ-K@o*N#*r3GvX!u@xHA2c2FE)+pUL zvm_*H$jisGd-;!cTzga_9{BhMT!f+R$5oIH!7UTdoW1Vm2_)Zn`xqhUfc6XZOCss? zIT=_s6y7k9X+?E1eai7(ND1jL5fSMbi-W!P4p>E zY~gG=7BIk=nvF`8HI*RWs$U>KNsTJCi)`WCX^`h@}gjrU!PKEkLIl;oFhOEYL4K>pzwC2 z%{vlK$=ApXr9jw4{k>gB8tdv}eoNA!L>_>MjhG)aCSV$Rs})zNBxg0%dC#lNZOEu9 zaGIO^ct|Svk>XU{3KR#b9<3wNXmFSPD5a2B5MoBOZiFz3fL4^w%t1GB^YS*4s0?C{ zP1No<)cwdHh}InlD_Jw8INgVBOf;L5uB(lr;v#O$} z0YMcNDW{S4U%DhjasbM-;`wCMY-42nCf}^%K%WIra{oftaxsxOnF)zCm264a@X-w` z(xd2leT&aYQo4b40*>veOvkDcC2U)pE(i+L&OnwD1z@>9Te!4$@$bqKBydE_hs^~m zjPpmbZ*vVuo3kTcmMqosHUs~-?zf3j&OpU|Z3lG5?J zJIA{b-1)gz3XQD?C(oyE$KGb6lW;~hk;Jao8kd+ik6q6zJL28L1(Vx7J{aR4lHrU@qD=6H$0bpR6NAJp@_Y!4QxMW1Y zpTG-${t=z{`b%`|5mT$I%s(DeL}Fw8t0|P(!ejnI!r#yCK|meb6CHl-&N*3e&vPYP z=w9FT*=6=cZ*V9GV9B(Jon>pJJMNg9SIKUf$h4kn$yG6Izqa29L8s^>Ux<9RJ@%vKtt|RnSkN<4^(b_fAr_{`oX(ZA511&oZ zn{iUsiuylNR%sU&6f`(O<78)iV68Ve`0H?8v|H?(^-$9cj`?}DM{mv0jTI=o=*7qT zvYIY3ef$VF!SZu1!R^!vy2lc$&A#lK$_+Cve|YB1lMOct(rR1}TvzDX6uCI!9HJbR zX%gPk#d23}xx>4!Ma6qCbeY9q1*@!>S9^2GV(76oD`#%b>DkUrntFfNmQe0KN_TM{ zrdbpA>hM5INj3yIy&*R*3)Y)bS{WRvFRhbOt5o%lzJZ*jD!Tz4L z$IBnubUI8Hbm`oA&t;RACX#tIUZt-8TL4J%p!;;2C1TTc2Id`QH8r zX2&;c$tJ2$Z8m52+*>!8%#EUT*AK4a)DVC6EQgUNlDnUX`%ku6CM*Rkzts@>xcw;D zOxf#eqf}`$|D_)9PXi^3jBg{YQ3$O@Lr?WyYPaNV2VJ*5Hid;5>b5JnG_L{5;x-C; z&{lT2B)C{9GPAggwlmQ8vjj%~>GP0Rd}gJU?^yY8(xIh0nDJPcV`bau zqWI7wDA!{&hXl*E#QNaYT?`lUo=m=HoX)XPa-Mk+Te7@R__1;{G*(EMS+}4$@$BWV z#T^~~=v+7|@FjP5+KcMi`Kg4h8`LP;7cTVl920VoKHbt15U`(?zv4@TtIaC3%c&#A z18hwLLo;D*Hg&1X=BznW(|^Es5CI&ZkFZCYC81Av>f zPeJkYo^3Mv3U^~mX74^h-wemBfoD_W;@pXSTh%4yYbewlYape*#&!>Hi5%-ASr<>7 z8=oa9lmsQJlFs^};{Y5vRr$XYb{h-KHn4ryBqdMXb1jW^(2{)O5nO-<6bZdGPA6 ztf%KT+h(47rYbe>##z~;3ddJpXMA&QSG2pE+hoOlO8;91X${okPPvL9mn-&~@dsTzQ=_^^n(`J%_XLK#$9@7v++&L9edYP7JCV zzq5GCpIuH{;8nb+IGxumabLW-M5e~%x{a+Z;f@IhgSyKzV5mUS7DyU1MQl4Y02i-( zdOTh`)0WMD$!@xYjoLS$t1tsO^VRb^O&ABRgWc)7oO_;6f=&w0dS>bRKy z_S;N2tT-Wt-qzKO2cnKI#2Pov2+kh zOzh}#9^77LnZGDiv$aes(5C&t(pUa_`@ zUMX=igfUn}+tN*PTs{3A&b*$J52sE=$_a*zkGgK%uOi=9!!lBHM9G{gjHFn-OXiSh zl$7FW!t63*l*7Q$1$x1Kh#ap&;am|r9y$Ism zDRp3k3Kj~{s}TK;L;JUWwI84?sh0@bOnMIUoEMWUezI83p|`E`+^k3a^sp2HGGcovqWe5naxm6_yST zUt2=>MVdN>wNJnQwp8$nM*&@74M^X_TL(m|kLbyb>5~JCf&y3oE;;B_(a$V!hl((YdMEjjwiz%QHE-27FNGud?Q z`)6boYTUs{^uxd)6nj+9d!VC+o_`40H^?WdeIt#0SI=i#;Zz3St~ zH1Ep|kNnw#t0NcY-|$Gt_VzL?`-|AhFAqpdwz9l3y$uN`{VyfJGvAhk9A>ylsMSD)X zw`W~*^UWwvoMM1?Y>%CicB%3c8YFV`Y}V8ZK@k1*72TnO;8^ z_VPaJOCAMKQH^QdG)vL~3VJxOYt%bNCNuPXDtr0wYTYOpE1212&GMYIHdb}p{S*@& zZ~>&5=44Fm5t_b)_6iWeV8Wo2#F~MVSzJYl{XXG}92U#qoomiY}cAy3AxM(L8H7&sdHBzZ5q#q1DBBVY4!iA1)_bp=__WrS6VnXLD zHa?If&G2q$qa}(0e}BSx1QLNesJma<-rKU(=cETW%w==<6+PQ%D}@H&^5)SQ>52Wc ziPzcSU5YpKd}JxKO&dGYN;w zx-S1;Ob|#66Rzch6OiWe@oDkwbiZ6BU3lo8F$fPixr)puJHoipW`xca^d~+%d+XZt z!GrgVQ6{vvKiv02UE{zebk4-L_a8i>q<;Iuf2w2Uz7i~0jhFX@Ax;jaq@l|CF#R$4 zC75oiex6#ZRkNpzWQRoP3Ne&DKorf#O3&~09Chy^BM88Rf*vL9*C<~oRqh|s!S)c< zDNYr1yn&l))h{vmf;GuBE}y-BeszCh`RI%#Y1}?yx~1h#B6vzz>0nlif%=|@v&y>v zC_VsxsgwCZr|{#?+POb_azcAQt{|`|Wq%C(PS%y}#z}v&ru* zDYtxw?Kb)SJbCDo7+zUj-%iJyC$ehe3C8^uXHucf&&7T+>^1zrvK74W-e~xy4&Y2R147-Fy@6o=Cg7 zuroxLUnt(Y&i2zwYrPN?)b(1kdGU~->W&Hjf(HdD#$KA&mP=jqLiWjR+nMhAr;Q+b zRV7E+S<^~4YWW<7@5$4z?nsP&IVLN>^kht;u)puB(T%uLN-2{>?%1c$nza3rwwnTI z&nBi%=MIcj_6=!A8X7uw&eOcm?omzk9{b{|nOi0wZdbjtEc&G_Unt?`^Jve4M_J3m z#+c9NUh#1Kk(jZrruF0&7rPs2r6(=sJUZDL*44C>UK{(eF~G-i=H`O*U>ogu#F93T zhcP;8#jbt7N6PD)w`|GZ?`WhmiAH#f?ouBVKyldSe`M4+;Oya3v{hK5nnXXgW78dH2<&PhHM-aeqsjb)Ne*#>i*|1!az@A&;gnM z;XK+I%n3%{5|RkQ-4a#9@?a1jWOUeWq5&jgJW?*MX1C`x4dSG76kJiZ6$W!A-97!fq+sCV^`31 zhMF{Lr}1R=2i|>~*a-;ovNALKL2qLk1tfL&G?*wk;d$~sKe3goqAreBR#E{+JB-{+ z4fXWho#+|>X(*k9jL%_d(Up`T;by>zLybVvu9@N)NhE-|Y|hZrlimY@5eaZ{EpQpX zlxkRFfD&+ESsELDi|h*#C7|B%&pNK#TyJQi=iNGl4>0#cra)>6+`&+hkbwh4ii)cc z?oxoBFyr_FslR|?#$1J?pfyu;$z{v$@EsbkF9$Src#7-OkW z&QOjNi*Ih$f>99VC3OC%8@FC-@O?~oznr*TC^M(P;X-&uJYZnE&V3>_fukoqHiSHe zwYCNAepmC&SOdyRE5OsGgIi%NprreN><=Y;2kVtKl1loNl53O^OUe{tux- zspmTH@vo<`kC0|7BD!jWbjS1=Xf??#VCdDJ*RG;aPU`O=CPY6uvgwTj=@LMr46z@g ziM-l-2SsR9tkRGNk})s#tSEJxbrpLO<3v@ANG0G20?IX^uJe!@mHLj-HR(JeVFrSe z8^>R>jiSdFfWx)TS?0rK!TH6B8pp5iHUKWnydSHNS0H5>(qo_$A$$-m=IK1dFvxrg zXn8uq;!rRd#9=@&f;6i`%)nDw4^z#;OJI}UMCNfn$q7vG{JR+Pk}d2K&rotlzJi_r z96Sf!$3t8pio zJfsR)<_!tv9j;uIniN1QVi!mt#f|kJ=Y9qX%(m^~Ls@2`uVx4xUJcN&0%Sn>LpBPg zM7lDGWC=w82ro!aIhH6H*FmVtwkC69$QI)jvCPuLGLyJB=|n+24fdLMq#SL}s<0}r z#U73nG{F9c@EO3CblmLz41>RSFSq4SG(WeuKkq1T9?GbZx)w?+#tuPn4M&}6)hb9D z%$Ax;cCKI!&|Ghih&Epr;1!1nF1Z;@co3ujBc}%;GPnt$$W)tUn)&$G_r$!=LxV{S zqhwG?Z*MREW&WO%GIO}`Gw6Ad3eNZPdk=UyXg+}5K+Z&Z<8djsi7FA)C*m3sRfcL~ zWKabmB2rWUadC3=2_b`hb)tllHYfCIBb_0V1Mwcg3#e9+n2uxft6}H(UtfxExI94V z=y!Q*XTBl;0((>Wzg>rcihVPVs-M@ayChyo4sn<<>QJ1KS% z2`#k?&iVp}*K7q=Q&OtBR<|XH?*4wY^UA`%CQBW9cow3Re2$82grxf-owd!Wdc+}N zy2HUp<)MK?KWC8(|(wjY1ghgMnjL?Zv~K7X#G>` zhcYKzKpkF`G$JC#Ji`an$u?h{)F4x%}(>dkEKo<|%C`l)a`iDCXeRFyi4H3<@<)93x4Rp_2|T z33f_$-&J&GfsJerumIBfJs!&uj{`d*f*|ze5yvn$(FYk0(J7J`i2^gu7fA!qRsjJI zFX_F*M6w(2Nl&$c(0<@iLYMw-X6#Hti(s^H5sh5-?AkXSiqS<+&OJ zDF_Q5ZrFF;$I{lQbpSi!A~Di@Zw8uc_&*Yt;GBr4fZPlEdjO}l67Ld%KY%KfsP!0V z_3YTS@6TSns!OfneN@hhxls5L!UNRWgp43WA~1(JltmEkAqLM^mMkg|?+gJbiQkwl z)dX=A0`;$1Kv*vvwqQlBDQee}79*cN+4Mxm>wG-^8s}eSohz3xE~7^v>W8k2^tOKf z{Q1_X>)28tP7tPDz!QMT%gz=icYt#v4Tr!l0|r6v?(QA+;c#CP6D=!N3bCh*F&IID zSF+GC>VO#oFj68~Ai^oqCXX^d@D0osQ2-~q+H>78WCK79kbjU#81SO%v_nc2Xm#E_ zvxnvGd-m`mv>u`}fxJ3OxI6g`p8potV>6Q=@+2|a+M0~JBGNYiOAF*XBshQ-L*7ch z4^0|rP$p_gGyo z&dx`JEJO>y5#&QM$&EyLJeSRdQ3oRCFfdrE%|3VFTjxD8cB-b2jLkrAEnbKSY)O9x zG6*Qyh_3?ekc@^M;)4)eh#U`(NP2ESZN)X**tST9d2CIb) zlZcIs4g&nWh}G!iz$@p8RPkeO1h9rGK7|AUF-s^_`XQbPKV)(m6cD{vamtBA2VzN> zYR7d6tN>}7#-Tv2Oo&jD(-C})K2<315ZPh;uoo>X;KDq4n3e&CeoxVW9b&>FfNXl}4~=Dunp=o5L}ra} z4mrl}$$111;T65+8z>g&=J?$wg2rYDK1c3e5?~_QPV6T%x@eY8oY$VO2Ie3`R)}}N zig$>OfkKN{JD77ON8n1#xev(Y7U=2eG5*83z`?s>9j*8A7y~X zo#glsqXN0XizAoc|EUui8suC0a~=jbcCe$>W)dYhfVzn&(~2gItm7b zxe88}r$nsVzI8626Sd`N@*R0|?NzO?V*Ra&g0WY2-D^`zdy3X?q}=&|M}>uwfj-!` zKCeLNV^c{F%*cLT3}_(Qdsz<8I5KqwE^Vahme0iWo4-uM6L4&P&Aq%cMBPCfT=jc9R=ECvw$`i+9od=94K@GSvhmegZ;%S zY=TydzD&Mhn%=Xn0wgzNx)PM{SkF+It1(^QZ%T@ezg%~7G}L`m{_3*9=R~^`-i>P# z&Z*B!Ow~WJ!u@`A%gyMgLIQ*yAwyb|AyHwG9%lC_ue{M04$Jz&N)tZJR6req3pqT- zZa|szAAJTe-Xx8Wo*AnBj!V(}lpLSeKfib}Opo$Dq4JkzeSPyn+_WaD=^tx0O_s6P zqHG1q1l|qrb4_6Yt`FimD7Tm9)0TCKehe*E;@J$-1HAXFyQ`w07@W?C138DOk_`z; zgiwI3cd%R!U_JH}&VKqTpTofH)h=tf5l2)DDzbJqixZU5{ZZ+T;PVymWI@$-3NM$l3Es)DYap~#YPuDBq-(?tk5L}V-a|0-bDRM() zK9vNgh#rBFP0vkqP^NO6ab<5iB)J)mlkhQOQQ|K-m#=w*r^*_t)+gpm)r5&otA6Wj>!;-UKXN+3PSzDbjLQAvwC3Xq7rQm-rH^+qFKL5$ zNAQ99X`y#Qiqe8pNezcct8;h-AW$QEPCsQoL>wN4GSw@oYV74ZKWh8xRufKot+@U8qVTxsu<2?G>1^M}C-o>;A zcs2O>4*vt>^;jO2u2l9>(MKdp7>40|E~(8a6u7B>Am>BxRRk(@Wop7PqaKaHxE(B8 zP`F4TWATte!*h=u6$+}z6)D;w&-B8uh05RHt z&;&qEF7irXMH}gQC_8|$NE%P596Vi?UH3_mzduEYs1Wg^)O`P5lz`p}kV#)DDJcr* zE7i2N9?qoP$hOwQd1Yp0Z6u!Th{NxB&v{46h-MgeAae>n7Qu`R4n-<3X^*>X7e~!GzGOCSfCp_8c?9&_k0zdtTflN|u~) z;$VARCzP_JZx=dva2S}O(|{z7RKCDNV;=z92O3nQn@A-J69jQ<$a*Z{6j6nVBE5Ws z>VPXn0~;aJux-R5ur3ml12Y-@tfIC12nCpBPb#F2mfWKrh9p#T$Oavq@ zK9t|EncsU(0i7-6$m8V+OM-!iUwpBX*@zc|FjyRU_)jv=09gI^bhmnQPovN>1uf`} z5(tL`8kA6$q^l6~;$9C+7GriS+?EAMl@TRP0RFVJn$!X4Uh=3#7r{c1=k@21e$llw zor4k@Idh2FgyDpbmS$v}wl#)%VC@?TU_!PHwK{?9SzSI~L`Z%at7_ez3-K6@6ofG^ z){s=&@Jayeq%oSh@;M5TX@x)UATSUn6`TS(m%&k_Aj3rK6{VS?v19)AGEwnh?a44o zFg-X8XPHMWi-BIrR4AMTyjfd@X#lk;x^_e`qRIjoju30^7kHoKkWs^FUQ>q%m1Aah z7P=2iH~3{Kj!7rDg6u1WHj1p@DfMR4+MA>(Lgl@L-~Kgq3M?QQb_iF65(=5`Kr}cc z3HFehc_IOoA4x~a=){&BuDrkTP~J#aNmr51d1n4mB!#u81Cjv~@Hgc9 zWF!FGZh@M;ib?VDjuA#WJ}@_0Fj3rBb#VDcGbTl2zf45X4rP$u0DxGPe6YMCkKe3J23=zjPjoG~-! zK^P$7(yX3LT5)d zlChjavN4-4bQfZ)V%-3Hp5?((78134M8#b?i-lVOacog{(gs&^oZ zG+06*h@7&$wI4?^ciSJhK4KrdqK)NgkT~!!gBVfN0=lGq&R)S<>U81e{Kuz6Cg31?*>m&ZGoQn*;a$0$mKg z)nNYb{OZTYdht-wY23uQy?=9lO1(<-^A zap%q_rBdCBDNb+iOs`H9w-Rwt5(K7XU^W2Fd_2B$$Y=#{%y^uV)Hu#5gqy7Q2Q=qqL5sc^tP`0aTg9s8}ObLWf1x0p8p zPYwn)#(+oMPLg}&A;i@R+QRzz&B3GEayI$g=CXzN?uB!lX+6is-?tXnJ=l|AZZ9TX zK0Xaw1Dxue>Xn(3bLOo`@Qys6?>C<}v7bMDs51HlV{4Eeu!VUv8(1ocN8J{CXZZ6| zsRFP}0XE-(CBY=MJ=ba{zdisekpI30uB6+4Is40(f8nk9OFtBzo(wt$47i;$Fz{lC z{5Rv}7h)G&@Or!2W=Gb;aLqM)zAl<}%%kYHmHo-HYH`P2y4p|YHs5NuQh)unpQXR| zop}Cs^1StvYKoiy+!jZXRUT~(9nZ@0GYnO8Dr zPmJ-r)XU$Efg9}@5`?IDly>ua-V4BeD_;#k$58dB%)0-#X2bv0`!Xcjfcq;-|NZ%? zDGfS8;z1#BPIuqu`I*2}b^7NjS>Wb$;3}L;$AQ}|81@;1X4lt%ZV1UW0yd6r=9Hac z1Mci*sF+qDtO(Lf{E>`Q+LQsbl;r)92mi|_e$4MN`rjN6)Xw1P>gTe~DWM4f&&AzH literal 0 HcmV?d00001 diff --git a/examples/aws-text-benchmarks/readme.md b/examples/aws-text-benchmarks/readme.md index b6ddc4a483..ae0e8ad703 100644 --- a/examples/aws-text-benchmarks/readme.md +++ b/examples/aws-text-benchmarks/readme.md @@ -24,7 +24,8 @@ python benchmark_deepsparse.py Note: DeepSparse uses static input shapes. Since the distribution of inputs for a dataset will be varied (multiple different sequence lengths), we can use bucketing where we compile DeepSparse with multiple input shapes and dynamically route inputs. - +In the case of `ag_news` (the example dataset in this case), the distribution of token lengths looks like the following: +![Histogram](image.png) As such, we used buckets of length 32, 64, and max_tokens. DeepSparse runs best with sequence lengths that are multiples of 16. ## Dense Model GPU From 257d7cf970b472bf7f1161b1d637c538b9c59c16 Mon Sep 17 00:00:00 2001 From: Derrick Mwiti Date: Wed, 7 Jun 2023 19:10:47 +0300 Subject: [PATCH 3/8] update bucket size --- examples/aws-text-benchmarks/benchmark_deepsparse.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/aws-text-benchmarks/benchmark_deepsparse.py b/examples/aws-text-benchmarks/benchmark_deepsparse.py index 9d7d7fdcbb..93e8121307 100644 --- a/examples/aws-text-benchmarks/benchmark_deepsparse.py +++ b/examples/aws-text-benchmarks/benchmark_deepsparse.py @@ -10,7 +10,7 @@ INPUT_COL = "text" dataset = load_dataset("ag_news", split="train[:3000]") batch_size = 64 -buckets = [32, 64] +buckets = [64, 128, 256] model_path = "./sparse-model/deployment/" ### TOKENIZE DATASET - (used to comptue buckets) @@ -36,7 +36,6 @@ def pre_process_fn(examples): ## RUN THROUPUT TESTING print("\nCompiling models:") -buckets = [32, 64] tc_pipeline = Pipeline.create( task="zero_shot_text_classification", From 20ede17fce58e23301cc022c1a1331585d60835a Mon Sep 17 00:00:00 2001 From: Derrick Mwiti Date: Wed, 7 Jun 2023 19:12:24 +0300 Subject: [PATCH 4/8] update bucket size --- examples/aws-text-benchmarks/readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/aws-text-benchmarks/readme.md b/examples/aws-text-benchmarks/readme.md index ae0e8ad703..021d1c8d1f 100644 --- a/examples/aws-text-benchmarks/readme.md +++ b/examples/aws-text-benchmarks/readme.md @@ -16,7 +16,7 @@ Download Sparse Model: sparsezoo.download zoo:nlp/text_classification/bert-large/pytorch/huggingface/mnli/pruned90_quant-none --save-dir ./sparse-model ``` -Run DeepSparse Benchmark (creates buckets for token len `32,64` and `max_token_len`): +Run DeepSparse Benchmark (creates buckets for token len 64, 128, and 256): ```bash python benchmark_deepsparse.py @@ -26,7 +26,7 @@ Note: DeepSparse uses static input shapes. Since the distribution of inputs for we can use bucketing where we compile DeepSparse with multiple input shapes and dynamically route inputs. In the case of `ag_news` (the example dataset in this case), the distribution of token lengths looks like the following: ![Histogram](image.png) -As such, we used buckets of length 32, 64, and max_tokens. DeepSparse runs best with sequence lengths that are multiples of 16. +As such, we used buckets of length 64, 128, and 256. DeepSparse runs best with sequence lengths that are multiples of 16. ## Dense Model GPU From 286a2042d3c0d11f714dc9e42e1b77bea220ff37 Mon Sep 17 00:00:00 2001 From: Derrick Mwiti Date: Wed, 7 Jun 2023 19:13:07 +0300 Subject: [PATCH 5/8] update bucket size --- examples/aws-text-benchmarks/readme.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/aws-text-benchmarks/readme.md b/examples/aws-text-benchmarks/readme.md index 021d1c8d1f..085f33cd5d 100644 --- a/examples/aws-text-benchmarks/readme.md +++ b/examples/aws-text-benchmarks/readme.md @@ -26,6 +26,7 @@ Note: DeepSparse uses static input shapes. Since the distribution of inputs for we can use bucketing where we compile DeepSparse with multiple input shapes and dynamically route inputs. In the case of `ag_news` (the example dataset in this case), the distribution of token lengths looks like the following: ![Histogram](image.png) + As such, we used buckets of length 64, 128, and 256. DeepSparse runs best with sequence lengths that are multiples of 16. ## Dense Model GPU From 0c56f503fc090d08b948f569e1b4ea5a732bef43 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 19 Jul 2023 17:29:06 -0400 Subject: [PATCH 6/8] Apply suggestions from code review Co-authored-by: Benjamin Fineran --- examples/aws-text-benchmarks/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/aws-text-benchmarks/readme.md b/examples/aws-text-benchmarks/readme.md index 085f33cd5d..e5efdcf03e 100644 --- a/examples/aws-text-benchmarks/readme.md +++ b/examples/aws-text-benchmarks/readme.md @@ -7,7 +7,7 @@ In this example, we run on the `ag_news` dataset with models downloaded from Spa Install DeepSparse: ```bash -pip install deepsparse +pip install deepsparse[transformers] ``` Download Sparse Model: From 9c58c50666fb71c445fdbb03fa315cf093f8a241 Mon Sep 17 00:00:00 2001 From: mgoin Date: Wed, 17 Jan 2024 21:32:23 +0000 Subject: [PATCH 7/8] Style --- .../benchmark_deepsparse.py | 46 ++++++++++++++---- .../benchmark_huggingface.py | 47 ++++++++++++++++--- examples/aws-text-benchmarks/readme.md | 16 +++++++ 3 files changed, 92 insertions(+), 17 deletions(-) diff --git a/examples/aws-text-benchmarks/benchmark_deepsparse.py b/examples/aws-text-benchmarks/benchmark_deepsparse.py index 93e8121307..0aace4b41e 100644 --- a/examples/aws-text-benchmarks/benchmark_deepsparse.py +++ b/examples/aws-text-benchmarks/benchmark_deepsparse.py @@ -1,10 +1,28 @@ -from deepsparse import Pipeline, Context +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time + +import numpy as np +from tqdm import tqdm +from transformers import AutoTokenizer + import deepsparse.transformers from datasets import load_dataset -from transformers import AutoTokenizer -from tqdm import tqdm -import numpy as np -import time, os +from deepsparse import Context, Pipeline + os.environ["NM_BIND_THREADS_TO_CORES"] = "1" INPUT_COL = "text" @@ -18,7 +36,13 @@ def pre_process_fn(examples): - return tokenizer(examples[INPUT_COL], add_special_tokens=True, return_tensors="np", padding=False, truncation=False) + return tokenizer( + examples[INPUT_COL], + add_special_tokens=True, + return_tensors="np", + padding=False, + truncation=False, + ) dataset = dataset.map(pre_process_fn, batched=True) @@ -32,7 +56,7 @@ def pre_process_fn(examples): batches = [] for b_index_start in range(0, len(inputs), batch_size): - batches.append(inputs[b_index_start:b_index_start + batch_size]) + batches.append(inputs[b_index_start : b_index_start + batch_size]) ## RUN THROUPUT TESTING print("\nCompiling models:") @@ -43,7 +67,7 @@ def pre_process_fn(examples): model_scheme="mnli", sequence_length=buckets, batch_size=batch_size, - context=Context(num_streams=1) + context=Context(num_streams=1), ) print("\nRunning test:") # run inferences on the datset @@ -51,7 +75,9 @@ def pre_process_fn(examples): predictions = [] for batch in tqdm(batches): - predictions.append(tc_pipeline(sequences=batch, labels=['Sports', 'Business', 'Sci/Tech'])) + predictions.append( + tc_pipeline(sequences=batch, labels=["Sports", "Business", "Sci/Tech"]) + ) # flatten and remove padded predictions predictions = [pred for sublist in predictions for pred in sublist.labels] @@ -63,4 +89,4 @@ def pre_process_fn(examples): print(f"Total time: {total_time_executing}") items_per_sec = len(predictions) / total_time_executing -print(f"Items Per Second: {items_per_sec}") \ No newline at end of file +print(f"Items Per Second: {items_per_sec}") diff --git a/examples/aws-text-benchmarks/benchmark_huggingface.py b/examples/aws-text-benchmarks/benchmark_huggingface.py index c97ada5222..261718a5b7 100644 --- a/examples/aws-text-benchmarks/benchmark_huggingface.py +++ b/examples/aws-text-benchmarks/benchmark_huggingface.py @@ -1,9 +1,27 @@ -from datasets import load_dataset -from transformers import pipeline, AutoTokenizer -from transformers.pipelines.pt_utils import KeyDataset -from tqdm import tqdm +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import time + +from tqdm import tqdm +from transformers import AutoTokenizer, pipeline +from transformers.pipelines.pt_utils import KeyDataset + import torch +from datasets import load_dataset + + model_path = "./dense-model/training/" batch_size = 64 @@ -16,8 +34,16 @@ ### TOKENIZE DATASETS - to sort dataset tokenizer = AutoTokenizer.from_pretrained(model_path) + def pre_process_fn(examples): - return tokenizer(examples[INPUT_COL], add_special_tokens=True, return_tensors="np", padding=False, truncation=False) + return tokenizer( + examples[INPUT_COL], + add_special_tokens=True, + return_tensors="np", + padding=False, + truncation=False, + ) + dataset = dataset.map(pre_process_fn, batched=True) dataset = dataset.add_column("num_tokens", list(map(len, dataset["input_ids"]))) @@ -28,13 +54,20 @@ def pre_process_fn(examples): ### RUN THROUGPUT TESTING # load model -hf_pipeline = pipeline("zero-shot-classification", model_path, batch_size=batch_size,device=("cuda:0" if torch.cuda.is_available() else "cpu"), ) +hf_pipeline = pipeline( + "zero-shot-classification", + model_path, + batch_size=batch_size, + device=("cuda:0" if torch.cuda.is_available() else "cpu"), +) # run inferences start = time.perf_counter() predictions = [] -for prediction in hf_pipeline(hf_dataset,candidate_labels=['Sports', 'Business', 'Sci/Tech']): +for prediction in hf_pipeline( + hf_dataset, candidate_labels=["Sports", "Business", "Sci/Tech"] +): predictions.append(prediction) # torch.cuda.synchronize() diff --git a/examples/aws-text-benchmarks/readme.md b/examples/aws-text-benchmarks/readme.md index e5efdcf03e..b394014e00 100644 --- a/examples/aws-text-benchmarks/readme.md +++ b/examples/aws-text-benchmarks/readme.md @@ -1,3 +1,19 @@ + + This repo contains example benchmarking scripts for computing throughput of DeepSparse with a sparse model and throughput of HuggingFace + PyTorch on a GPU with a dense model. In this example, we run on the `ag_news` dataset with models downloaded from SparseZoo. From 96faf36c7fb09cdfbb583bc6de39a5f0712956c9 Mon Sep 17 00:00:00 2001 From: mgoin Date: Wed, 17 Jan 2024 22:03:38 +0000 Subject: [PATCH 8/8] Style --- examples/aws-text-benchmarks/benchmark_deepsparse.py | 8 +++----- examples/aws-text-benchmarks/benchmark_huggingface.py | 9 ++++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/examples/aws-text-benchmarks/benchmark_deepsparse.py b/examples/aws-text-benchmarks/benchmark_deepsparse.py index 0aace4b41e..42e8335f30 100644 --- a/examples/aws-text-benchmarks/benchmark_deepsparse.py +++ b/examples/aws-text-benchmarks/benchmark_deepsparse.py @@ -15,11 +15,9 @@ import os import time -import numpy as np from tqdm import tqdm from transformers import AutoTokenizer -import deepsparse.transformers from datasets import load_dataset from deepsparse import Context, Pipeline @@ -31,7 +29,7 @@ buckets = [64, 128, 256] model_path = "./sparse-model/deployment/" -### TOKENIZE DATASET - (used to comptue buckets) +# TOKENIZE DATASET - (used to comptue buckets) tokenizer = AutoTokenizer.from_pretrained(model_path) @@ -50,7 +48,7 @@ def pre_process_fn(examples): dataset = dataset.sort("num_tokens") max_token_len = dataset[-1]["num_tokens"] -### SPLIT DATA INTO BATCHES +# SPLIT DATA INTO BATCHES num_pad_items = batch_size - (dataset.num_rows % batch_size) inputs = ([""] * num_pad_items) + dataset[INPUT_COL] batches = [] @@ -58,7 +56,7 @@ def pre_process_fn(examples): for b_index_start in range(0, len(inputs), batch_size): batches.append(inputs[b_index_start : b_index_start + batch_size]) -## RUN THROUPUT TESTING +# RUN THROUPUT TESTING print("\nCompiling models:") tc_pipeline = Pipeline.create( diff --git a/examples/aws-text-benchmarks/benchmark_huggingface.py b/examples/aws-text-benchmarks/benchmark_huggingface.py index 261718a5b7..3ccc485856 100644 --- a/examples/aws-text-benchmarks/benchmark_huggingface.py +++ b/examples/aws-text-benchmarks/benchmark_huggingface.py @@ -14,7 +14,6 @@ import time -from tqdm import tqdm from transformers import AutoTokenizer, pipeline from transformers.pipelines.pt_utils import KeyDataset @@ -25,13 +24,13 @@ model_path = "./dense-model/training/" batch_size = 64 -### SETUP DATASETS - in this case, we download ag_news +# SETUP DATASETS - in this case, we download ag_news print("Setting up the dataset:") INPUT_COL = "text" dataset = load_dataset("ag_news", split="train[:3000]") -### TOKENIZE DATASETS - to sort dataset +# TOKENIZE DATASETS - to sort dataset tokenizer = AutoTokenizer.from_pretrained(model_path) @@ -49,10 +48,10 @@ def pre_process_fn(examples): dataset = dataset.add_column("num_tokens", list(map(len, dataset["input_ids"]))) dataset = dataset.sort("num_tokens") -### SPLIT DATA INTO BATCHES +# SPLIT DATA INTO BATCHES hf_dataset = KeyDataset(dataset, INPUT_COL) -### RUN THROUGPUT TESTING +# RUN THROUGPUT TESTING # load model hf_pipeline = pipeline( "zero-shot-classification",