From 03050782ee8a7d8658e24aba3e9b2e289628d717 Mon Sep 17 00:00:00 2001 From: Himanshu Maurya Date: Fri, 22 Sep 2023 11:31:11 +0100 Subject: [PATCH] updated summary description of post --- content/posts/lm_sharding.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/posts/lm_sharding.md b/content/posts/lm_sharding.md index 0049d7c..1ffb271 100644 --- a/content/posts/lm_sharding.md +++ b/content/posts/lm_sharding.md @@ -5,7 +5,7 @@ draft: false ShowToc: true category: [ai] tags: ["llms", "ai", "inference"] -description: "A guide to fine-tuning GPT-X models with DeepSpeed" +description: "Techniques to load LLMs on smaller GPUs and enable parallel inference using Hugging Face Accelerate" --- *With the rise of deep learning and the development of increasingly powerful models, pre-trained language models have grown in size. While these models deliver impressive performance in various natural language processing (NLP) tasks, their sheer magnitude poses challenges for inference on resource-constrained devices and large-scale distributed systems. Enter sharding, a technique that divides large models into smaller, more manageable parts, offering an efficient and faster approach to distributed inference.*