diff --git a/.taskfiles/hugo/taskfile.yaml b/.taskfiles/hugo/taskfile.yaml
index df32542..1d7063f 100644
--- a/.taskfiles/hugo/taskfile.yaml
+++ b/.taskfiles/hugo/taskfile.yaml
@@ -33,7 +33,7 @@ tasks:
         sed -i '/draft: true/c\draft: false' {{ .path }}
       - |
         sed -i '/date: .*/c\date: {{ now.Format "2006-01-02" }}' {{ .path }}
-      - echo "Branch is ready for PR"
+      - echo "Commit this change, then branch is ready for PR!"
       # - task: _pull_request
     requires:
       vars: ["path"]
diff --git a/archetypes/blog.md b/archetypes/blog.md
index 769bcda..b054c36 100644
--- a/archetypes/blog.md
+++ b/archetypes/blog.md
@@ -1,6 +1,10 @@
 ---
 title: {{ replace .Name "-" " " | title }}
 date: {{ .Date }}
+authors:
+  - name: ahgraber
+    link: https://github.com/ahgraber
+    image: https://github.com/ahgraber.png
 tags:
   # meta
   - 'meta'
@@ -19,5 +23,8 @@ tags:
   - 'copyright'
   - 'privacy'
 series: []
+layout: single
+toc: true
+math: false
 draft: true
 ---
diff --git a/config/_default/params.yaml b/config/_default/params.yaml
index 51e9c01..1584f08 100644
--- a/config/_default/params.yaml
+++ b/config/_default/params.yaml
@@ -41,7 +41,7 @@ navbar:
     width: 50
     height: 50
 footer:
-  width: full # *width
+  width: *width
   displayCopyright: true
   displayPoweredBy: true
 
diff --git a/content/blog/hello-world/index.md b/content/blog/hello-world/index.md
index 7cb9780..1ba6428 100644
--- a/content/blog/hello-world/index.md
+++ b/content/blog/hello-world/index.md
@@ -1,10 +1,17 @@
 ---
 title: Hello World!
 date: 2024-04-22
+authors:
+  - name: ahgraber
+    link: https://github.com/ahgraber
+    image: https://github.com/ahgraber.png
 tags:
   - 'meta'
   - 'blogumentation'
   - 'homelab'
+layout: single
+toc: true
+math: false
 draft: false
 ---
 
diff --git a/content/blog/llama3-cohort.md/architectures.csv b/content/blog/llama3-cohort.md/architectures.csv
new file mode 100644
index 0000000..e72d4a9
--- /dev/null
+++ b/content/blog/llama3-cohort.md/architectures.csv
@@ -0,0 +1,21 @@
+﻿,Meta,,,Google,Cohere,Databricks,Mistral,Meta,,Microsoft,,,Snowflake,DeepSeek
+Release Date,18-Jul-23,,,21-Feb-24,11-Mar-24,27-Mar-24,17-Apr-24,18-Apr-24,,22-Apr-24,,,24-Apr-24,7-May-24
+Name,llama-2-7B,llama-2-13B,llama-2-70B,Gemma 7B,Command-R,DBRX,8x22B,llama-3-8B,llama-3-70B,Phi 3 mini,Phi 3 small,Phi 3 medium,Arctic,v2
+Training Tokens,2T,2T,2T,6T,_?_,12T,_?_,15T,80T,3.3T,4.8T,4.8T,3.5T,8.1T
+Tokenizer Vocabulary,32k,32k,32k,256k,256k,100k,32k,128k,128k,32k,100k,32k (?),32k,100k
+Context Length (training),4k,4k,4k,8k,8k,32k,4k,8k,8k,4k,4k,,4k,4k
+Hidden dimension,4096,5120,8192,3072,8192,6144,6144,4096,8192,3072,4096,5120,7168,5120
+FF dimension,11008,13824,28672,24576,,10752,16384,14336,28672,8192,_?_,_?_,4864,1536
+Positional Encoding,RoPE,RoPE,RoPE,RoPE,RoPE?,RoPE,RoPE,RoPE,RoPE,RoPE / LongRoPE,RoPE?,RoPE?,RoPE,RoPE
+Normalization,RMSNorm,RMSNorm,RMSNorm,RMSNorm,_?_,Layer,RMSNorm,RMSNorm,RMSNorm,RMSNorm,_?_,_?_,RMSNorm,RMSNorm
+Activation Function,SwiGLU,SwiGLU,SwiGLU,GeGLU,SiLU,GLU,SiLU,SwiGLU,SwiGLU,SiLU,_?_,_?_,SwiGLU,SwiGLU
+Attention,_?_,_?_,GQA,MQA,_?_,GQA,"SWA, GQA",GQA,GQA,SWA,GQA; BlockSparse,_?_,Attention-sinks SWA (TBD),MLA
+Heads,32,40,64,16,64,48,48,32,64,32,32,40,56,128
+Layers,32,40,80,28,40,40,56,32,80,32,32,40,35,60
+Alignment,"SFT, PPO","SFT, PPO","SFT, Rejection Sampling, PPO","SFT, RLHF",_?_,"SFT, _RLHF (implied)_","? SFT, DPO","SFT, Rejection Sampling, PPO, DPO","SFT, Rejection Sampling, PPO, DPO","SFT, DPO",_?_,_?_,SFT,"SFT, GRPO"
+MoE,no,no,no,no,no,yes,yes,no,no,no,no,no,hybrid,yes
+Experts,,,,,,16,8,,,,,,128,160+2
+Top-k,,,,,,4,2,,,,,,2,6
+Total Params,,,,,,132B,141B,,,,,,480B,236B
+**Parameters (active)**,**7B**,**13B**,**70B**,**7B**,**35B**,**36B**,**39B**,**8B**,**70B**,**3.8B**,**7B**,**14B**,**17B**,**21B**
+Context Length (inference),4k,4k,4k,8k,128k,32k,64k,8k,8k,4k; 128k,8k,_?_,4k; 32k with SWA,128k
diff --git a/content/blog/llama3-cohort.md/benchmarks.csv b/content/blog/llama3-cohort.md/benchmarks.csv
new file mode 100644
index 0000000..b0fde3e
--- /dev/null
+++ b/content/blog/llama3-cohort.md/benchmarks.csv
@@ -0,0 +1,21 @@
+﻿,Release Date,Name,MMLU (language),modifier,GSM8K (math),modifier,HumanEval (code),modifier
+Meta,18-Jul-23,llama-2-7B,34.1,5-shot,25.7,8-shot CoT,7.9,0-shot
+,,llama-2-13B,47.8,5-shot,77.4,8-shot CoT,14,0-shot
+,,llama-2-70B,52.9,5-shot,57.5,8-shot CoT,25.6,0-shot
+Google,21-Feb-24,Gemma 7B,64.3,5-shot,46.4,maj@1,32.3,0-shot
+Cohere,11-Mar-24,Command-R,59.3,5-shot,,,,
+Databricks,27-Mar-24,DBRX,73.7,5-shot,72.8,8-shot CoT,70.1,0-shot
+Mistral,17-Apr-24,8x22B,77.7,5-shot,90.8,8-shot CoT,45.1,0-shot
+Meta,18-Apr-24,llama-3-8B,68.4,5-shot,79.6,8-shot CoT,62.2,0-shot
+,,llama-3-70B,82,5-shot,93,8-shot CoT,71.7,0-shot
+Microsoft,22-Apr-24,Phi 3 mini,68.8,5-shot,82.5,0-shot CoT,59.1,0-shot
+,,Phi 3 small,75.3,5-shot,88.9,0-shot CoT,59.1,0-shot
+,,Phi 3 medium,78.2,5-shot,90.3,0-shot CoT,55.5,0-shot
+Snowflake,24-Apr-24,Arctic,67.3,5-shot,74.2,?,64.3,?
+DeepSeek,7-May-24,v2,78.5,5-shot,79.2,0-shot CoT,48.8,0-shot
+,,,,,,,,
+Anthropic,4-Mar-24,Claude 3 Haiku,75.2,5-shot,88.9,0-shot,75.9,0-shot
+,,Claude 3 Sonnet,79,5-shot,92.3,0-shot,73,0-shot
+,,Claude 3 Opus,86.8,5-shot,95,0-shot,84.9,0-shot
+OpenAI,14-Mar-23,GPT 3.5-turbo,70,5-shot,57.1,5-shot CoT,48.1,0-shot
+,14-Mar-23,GPT 4,86.4,5-shot,92,5-shot CoT,67,0-shot
diff --git a/content/blog/llama3-cohort.md/environmental_impact.csv b/content/blog/llama3-cohort.md/environmental_impact.csv
new file mode 100644
index 0000000..41a1d72
--- /dev/null
+++ b/content/blog/llama3-cohort.md/environmental_impact.csv
@@ -0,0 +1,15 @@
+﻿,Release Date,Name,GPUs,GPU Hours,Power Consumption (W),tCO2eq,FLOPs*,assumed utilization*
+Meta,18-Jul-23,llama-2-7B,A100-80GB,"184,320",400,31.22,1.60E+22,30%
+,,llama-2-13B,A100-80GB,"368,640",400,62.44,3.10E+22,30%
+,,llama-2-70B,A100-80GB,"1,720,320",400,291.42,1.40E+23,30%
+Google,21-Feb-24,Gemma 7B,4096 TPUv5e,,,"~131, incl. 2B models",,
+Cohere,11-Mar-24,Command-R,,,,,,
+Databricks,27-Mar-24,DBRX,3072 H100s,,,,,
+Mistral,17-Apr-24,8x22B,,,,,,
+Meta,18-Apr-24,llama-3-8B,16k H100s,1.3M,700,390,2.40E+23,40%
+,,llama-3-70B,16k H100s,6.4M,700,1900,1.20E+24,40%
+Microsoft,22-Apr-24,Phi 3 mini,,,,,,
+,,Phi 3 small,,,,,,
+,,Phi 3 medium,,,,,,
+Snowflake,24-Apr-24,Arctic,H100s,"~504,000",700,,7.10E+22,30%
+DeepSeek,7-May-24,v2,H800s,"~172,800",700,,,
diff --git a/content/blog/llama3-cohort.md/index.md b/content/blog/llama3-cohort.md/index.md
new file mode 100644
index 0000000..d3465cb
--- /dev/null
+++ b/content/blog/llama3-cohort.md/index.md
@@ -0,0 +1,240 @@
+---
+title: Cohort of Models
+date: 2024-05-11
+authors:
+  - name: ahgraber
+    link: https://github.com/ahgraber
+    image: https://github.com/ahgraber.png
+tags: ["LLMs", "generative ai", "comparison"]
+series: []
+layout: wide
+toc: false
+math: false
+draft: false
+---
+The "open weights" or "open model" LLM ecosystem is thriving,
+with major new releases from Meta, Microsoft, Databricks, and Snowflake in the past two months.
+Given that Meta's Llama 2 family became a standard for comparison for all other models,
+I thought it would be useful that aggregate all of the information of the 'Llama 3 cohort' in a single place.
+I've included Llama 2 as a point of comparison.  Models are ordered by date of introduction.
+
+{{< callout type="question" emoji="📣" >}}
+  I've done my best to make these table not-terrible,
+  but they're probably still trash on mobile or non-wide aspect ratios.
+  Sorry.
+{{< /callout >}}
+
+## Architectures
+
+{{< table path="architectures.csv" header="true" caption="A comparison of model architectures" >}}
+
+{{< callout type="warning" >}}
+  I've not included ChatGPT 3.5 or 4, or Anthropic's Claude because they're completely closed models
+  and I was unable to find any information worth comparing.
+{{< /callout >}}
+
+## Performance
+
+{{< callout type="question" emoji="📣" >}}
+
+  1. I've transposed table rows/columns from here on (they fit better that way).
+  2. I've included benchmarks from OpenAI's GPT4 paper and Anthropic's Claude 3 announcement as points of comparison.
+{{< /callout >}}
+
+{{< table path="benchmarks.csv" header="true" caption="A comparison of model architectures" >}}
+
+## Environmental impact
+
+{{< table path="environmental_impact.csv" header="true" caption="Environmental impacts of training (but not inference)" >}}
+
+\* FLOPs and % utilization are estimated using Epoch AI's online tool[^compute]
+
+## Observations
+
+In doing the research and aggregation for these tables,
+I read, and reread, and pored over the papers describing the technical details of these models.
+
+The TLDR? All changes have been made with inference efficiency in mind.
+
+### Data matters
+
+Almost every paper makes a statement about how they cleaned, filtered, and optimized their data;
+several (Llama 3, Phi 3) use LLMs in this process.
+Some (the Phi 3 models in particular), also use LLMs to generate "high quality synthetic" data.
+Databricks notes in their DBRX announcement that the dataset used to train DBRX is 2x as good as
+the dataset they used in 2023; that is, "half as many tokens are necessary to reach the same model quality."[^dbrx]
+Concretely, curation of training data removes the low-information sequences that require compute to process
+but minimally improve model performance.
+
+Additionally, models are training on more tokens,
+moving from compute-optimal toward data-optimal frontiers of the Chinchilla laws.
+Microsoft explicitly state this in the Phi 3 technical paper,
+saying they "calibrate the training data to be closer to the "data optimal" regime for small models."[^phi]
+
+{{< callout type="info" emoji="💡" >}}
+  The Chinchilla "laws" describe the relationship between model size (parameters),
+  dataset size (tokens), performance (loss), and cost of compute (FLOPs).
+  Historically, the Chinchilla laws have been used to understand the "compute optimal" point,
+  or the point of diminishing returns (in terms of model performance improvement) associated with additional compute,
+  given a static model size and dataset.
+{{< /callout >}}
+
+Why is this? Well, it turns out that inference costs are expensive. _Really expensive_.
+Like, OpenAI-might-go-bankrupt-because-it-costs-$700,000-per-day expensive.[^cost]
+LLM providers want efficiency, but they also need to show improvements over the last generation.
+To do this, they have transitioned toward the "data optimal" (or more importantly, inference-efficient) training paradigm.
+The Chinchilla laws suggest that you can reach the same performance as a "compute optimal" model by training a smaller model for longer on more tokens.[^law]
+The Llama 3 cohort trains a model the same size as the prior generation for longer on many more, more informative tokens,
+demonstrating generational improvements without increasing model sizes (and therefore without increasing inference requirements).
+
+### Attention on attention
+
+Aside from optimizations to Attention, there have been minimal changes to the Transformers architecture.
+All models use RoPE for their positional embeddings, almost all use RMSNorm,
+and the majority use SwiGLU for the activation function in their feedforward modules.
+
+Almost all new models use new approaches to optimize Attention,
+including Multi-Query Attention (MQA)[^mqa], Grouped-Query Attention (GQA)[^gqa],
+Sliding Window Attention (SWA)[^swa], and even attention sinks[^snk]!
+Most models use GQA, while Mistral retains its use of SWA from its prior generation, and Phi 3 experiments with it.
+Snowflake shared they are "developing an attention-sinks-based sliding window implementation
+to support unlimited sequence generation capability" in their Arctic release announcement.[^snow]
+DeepSeek just released their v2, which introduces a novel Multi-head Latent Attention (MLA) mechanism.[^mla]
+
+Why the focus on Attention? The Attention mechanism scales poorly relative to sequence length.
+As we increase the context length that LLMs can handle, we massively increase compute and memory required
+to allow each token to attend to each other token (quadratic scaling).
+While Flash Attention[^flash] and KV Caching have reduced this worst-case quadratic scaling,
+Attention is still incredibly memory-hungry - KV caching trades linear compute scaling (good!) for increased memory requirements (bad!) at inference time.
+The aforementioned Attention optimizations focus primarily on reducing memory used by Attention, especially during inference.
+
+### Expert expectations
+
+Four of the Llama-3 cohort use a Mixture of Experts (MoE) design;
+Snowflake Arctic and DeepSeek-v2 use novel MoE architectures while Mistral 8x22B and DBRX use a more standard sparse MoE.
+Mixture of Experts models alter the feed-forward component of a standard Transformer, turning the standard dense network
+into a larger sparse one.
+These wider FF nets are grouped into "experts", and a router determines which expert is active on a per-token basis.
+In theory, MoE allows the model to have a much higher parameter count (see `active parameters` in the top table above).
+Therefore, an LLM with an MoE architecture is kind of a cheat against the Chinchilla laws;
+they tend to train quickly (i.e., with less compute) and have very good performance (on benchmarks) relative to dense models of equivalent _active_ parameter counts.
+The sparsity of MoE also means they tend to be fast in inference.
+
+The downsize?
+While MoE models are _compute_ efficient because only a portion of the sparse feed-forward is active at any one time,
+MoE models have larger memory requirements because all of the parameters (even the inactive ones) have to be loaded into memory
+for inference.
+
+As an aside, I have to wonder about the use of Mixture-of-Experts as a way to increase model size while maintaining/improving inference speed.
+Recent research with pruning dense models indicates LLMs are already overparameterized.[^short] [^prune]
+These MoE models seem to be even further overparameterized vs their dense peers.
+Both DataBricks DBRX and Snowflake Arctic are MoE models with significantly more total _and_ active parameters than Llama 3 8B,
+and both trained on fewer tokens.
+DBRX is closer (12T DBRX vs 15T Llama3 8B). Snowflake Arctic only trained on <4T,
+thought this may be because Arctic is intended more for "enterprise" use cases than as a general language model
+(the lack of MMLU scores reported for Arctic adds weight to this theory).
+Perhaps the additional overparameterization helps to explain their training speed and performance,
+while the sparsity associated with the expert routing is akin to pruning a dense network?
+
+### Inference implications
+
+{{< callout type="question" emoji="📣" >}}
+  Most of what I'll mention here is a rehash of what I learned listening to [Dylan Patel (of SemiAnalysis) on the Latent Space podcast](https://www.latent.space/p/semianalysis).
+  It is 100% worth the listen (or reviewing the transcript).
+
+  In fact, go do that now. I'll wait.
+{{< /callout >}}
+
+As Dylan Patel pointed out, training LLMs tends to be compute-intensive; compute FLOPs are the bottleneck.
+You get as close to 100% utilization (MFU - model FLOPs utilization) as you can
+(given network/communication and sharding inefficiencies) during training.
+At inference time, you need _half_ the compute needed when training (you only need the forward pass, no backpropagation).
+If you want to get more responses to more prompts, you need memory.
+If you want to provide longer answers or have longer input contexts, you need memory.
+And as I discussed earlier, Attention is memory hungry; it scales approximately quadratically based on sequence length.
+So all of a sudden, you're limited by memory bandwidth (MBU - _memory_ bandwidth utilization).
+
+Now we see why the AI Engineers made the choices they have for the Llama-3 cohort.
+MoE lets us take advantage of larger GPU clusters' total memory and spread the MBU out over more individual GPUs.
+Attention optimizations reduce the memory requirements relative to sequence lengths, reducing MBU requirements.
+Due to the data-optimal paradigm shift, model sizes have (generally) not increased,
+meaning we get better performance on today's infrastructure, or we can retain current performance with smaller models...
+reducing MBU requirements.  Everything comes back to efficiency in inference.
+
+### Frustrations and concerns
+
+Over the course of researching for this post, I ran into some frustrations:
+
+First, technical papers do not always include details of the model architectures,
+either leaving them entirely unsaid or referring the reader to "prior work".
+This is lazy.  Since the reader _does_ have access to the information in the prior work,
+the authors are not hiding anything per se, but are making the reader chase down the unspecified pieces of information.
+It also reduces any replicability (not that replication is possible anyway, given these models are generally open weights
+and do not share training code or source datasets... but I digress)
+and invites assumptions that may not be accurate should the wrong prior work be referenced
+or should the current implementation use some slight unmentioned deviations from the prior work
+that was glossed over or forgotten during publication.
+As a side note, I found HuggingFace's model catalog[^huggingface] to be invaluable in filling in many of the details,
+especially around hidden and feed-forward dimensions and attention head and layer counts.
+
+Secondly, reporting performance benchmarks is unstandardized.
+In the papers, there was high variance in terms of benchmark specificity, meaning the reported results are likely _not_ directly comparable --
+and this is prior to considering any shenanigans on the _implementation_ of the benchmarks.[^openllm] [^benchmarks]
+MMLU might be measured based with English-only questions, or on subsets of the benchmark,
+or used to demonstrate multilingual capabilities.
+GSM8K evaluation might use 8-shot chain-of-thought, 5-shot chain-of-thought, 0-shot, or majority voting.
+Further, its often unclear whether the reported benchmarks are done on the model after pretraining,
+after fine-tuning, or after alignment (or some combination that provides the best results).
+So, take all benchmarks with a grain of salt... which, doesn't that kind of invalidate the raison d'etre of a benchmark?
+
+Finally, I have to credit Meta for revealing training cost in terms of GPU hours _and_ energy implications (tons of CO2 equivalent).
+It is frustrating to me that it is nonstandard to discuss energy grid and environmental implications of their training costs.
+I would like to see all technical papers discuss the training costs/requirements for their models,
+even if they do not release the source code to replicate.
+GPU class, GPU time, FLOPs, and tCO2eq are all important metrics to understand the energy requirements of training a model.
+Further, I would also like to see papers report the cost per 1M token inference in the same manner -
+GPUs required, GPU class, FLOPs, and tCO2eq.
+
+## References
+
+- [Models - Hugging Face](https://huggingface.co/models)
+- [[2302.13971] LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971)
+- [[2307.09288] Llama 2: Open Foundation and Fine-Tuned Chat Models](https://arxiv.org/abs/2307.09288)
+- [Introducing Meta Llama 3: The most capable openly available LLM to date](https://ai.meta.com/blog/meta-llama-3/)
+- [gemma-report.pdf](https://storage.googleapis.com/deepmind-media/gemma/gemma-report.pdf)
+- [[2310.06825] Mistral 7B](https://arxiv.org/abs/2310.06825)
+- [[2401.04088] Mixtral of Experts](https://arxiv.org/abs/2401.04088)
+- [Cheaper, Better, Faster, Stronger | Mistral AI | Frontier AI in your hands](https://mistral.ai/news/mixtral-8x22b/)
+- [mistralai/mistral-src: Reference implementation of Mistral AI 7B v0.1 model.](https://github.com/mistralai/mistral-src)
+- [databricks/dbrx: Code examples and resources for DBRX, a large language model developed by Databricks](https://github.com/databricks/dbrx)
+- [Snowflake-Labs/snowflake-arctic](https://github.com/Snowflake-Labs/snowflake-arctic)
+- [Snowflake Arctic Cookbook Series: Arctic's Approach to Data | by Snowflake AI Research | Snowflake Builders Blog: Data Engineers, App Developers, AI/ML, & Data Science | Apr, 2024 | Medium](https://medium.com/snowflake/snowflake-arctic-cookbook-series-arctics-approach-to-data-b81a8a0958bd)
+- [[2309.05463] Textbooks Are All You Need II: phi-1.5 technical report](https://arxiv.org/abs/2309.05463)
+- [[2306.11644] Textbooks Are All You Need](https://arxiv.org/abs/2306.11644)
+- [Command R: RAG at Production Scale](https://cohere.com/blog/command-r)
+- [Introducing the next generation of Claude \ Anthropic](https://www.anthropic.com/news/claude-3-family)
+- [Model_Card_Claude_3.pdf](https://www-cdn.anthropic.com/de8ba9b01c9ab7cbabf5c33b80b7bbc618857627/Model_Card_Claude_3.pdf)
+- [Zhen Wang on LinkedIn: #anthropic #claude #tokenizer #llm](https://www.linkedin.com/posts/zhenwang_anthropic-claude-tokenizer-activity-7067072872019619840-hZ-7)
+- [GPT-4 | OpenAI](https://openai.com/index/gpt-4-research/)
+- [[2303.08774] GPT-4 Technical Report](https://arxiv.org/abs/2303.08774)
+- [Andrej Karpathy on X: "Congrats to @AIatMeta on Llama 3 release!! 🎉](https://twitter.com/karpathy/status/1781028605709234613)
+
+## Works Cited
+
+[^compute]: [Estimating Training Compute of Deep Learning Models – Epoch AI](https://epochai.org/blog/estimating-training-compute)
+[^dbrx]: [Introducing DBRX: A New State-of-the-Art Open LLM | Databricks Blog](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm)
+[^phi]: [[2404.14219] Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone](https://arxiv.org/abs/2404.14219)
+[^cost]: [The Inference Cost Of Search Disruption – Large Language Model Cost Analysis](https://www.semianalysis.com/p/the-inference-cost-of-search-disruption)
+[^law]: [Revised Chinchilla scaling laws – LLM compute and token requirements – Educating Silicon](https://www.educatingsilicon.com/2024/04/29/revised-chinchilla-scaling-laws-impact-on-llm-compute-and-token-requirements/)
+[^mqa]: [[1911.02150] Fast Transformer Decoding: One Write-Head is All You Need](https://arxiv.org/abs/1911.02150)
+[^gqa]: [[2305.13245] GQA: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints](https://arxiv.org/abs/2305.13245)
+[^swa]: [[2004.05150v2] Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150v2)
+[^snk]: [[2309.17453] Efficient Streaming Language Models with Attention Sinks](https://arxiv.org/abs/2309.17453)
+[^snow]: [Snowflake Arctic - LLM for Enterprise AI](https://www.snowflake.com/blog/arctic-open-efficient-foundation-language-models-snowflake/)
+[^mla]: [[2405.04434v2] DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model](https://arxiv.org/abs/2405.04434v2)
+[^flash]: [[2205.14135] FlashAttention: Fast and Memory-Efficient Exact Attention with IO-Awareness](https://arxiv.org/abs/2205.14135)
+[^short]: [[2403.03853] ShortGPT: Layers in Large Language Models are More Redundant Than You Expect](https://arxiv.org/abs/2403.03853)
+[^prune]: [[2306.11695] A Simple and Effective Pruning Approach for Large Language Models](https://arxiv.org/abs/2306.11695)
+[^huggingface]: [Models - Hugging Face](https://huggingface.co/models)
+[^openllm]: [What's going on with the Open LLM Leaderboard?](https://huggingface.co/blog/open-llm-leaderboard-mmlu)
+[^benchmarks]: [[2402.01781v1] When Benchmarks are Targets: Revealing the Sensitivity of Large Language Model Leaderboards](https://arxiv.org/abs/2402.01781v1?trk=public_post_comment-text)
diff --git a/content/blog/no-mo-robo/index.md b/content/blog/no-mo-robo/index.md
index fd9e51f..c34a1a0 100644
--- a/content/blog/no-mo-robo/index.md
+++ b/content/blog/no-mo-robo/index.md
@@ -1,12 +1,19 @@
 ---
 title: No Mo Robo
 date: 2024-05-03
+authors:
+  - name: ahgraber
+    link: https://github.com/ahgraber
+    image: https://github.com/ahgraber.png
 tags:
   - 'opinion'
   - 'generative AI'
   - 'LLMs'
   - 'copyright'
 series: []
+layout: single
+toc: true
+math: false
 draft: false
 ---
 ## Web crawlers & search
diff --git a/content/blog/the-compounding-error-of-generative-models/index.md b/content/blog/the-compounding-error-of-generative-models/index.md
index 7043972..8633892 100644
--- a/content/blog/the-compounding-error-of-generative-models/index.md
+++ b/content/blog/the-compounding-error-of-generative-models/index.md
@@ -1,12 +1,18 @@
 ---
 title: The Compounding Error of Generative Models
 date: 2024-04-30
+authors:
+  - name: ahgraber
+    link: https://github.com/ahgraber
+    image: https://github.com/ahgraber.png
 tags:
   - 'agents'
   - 'generative AI'
   - 'prompts'
   - 'LLMs'
 series: []
+layout: single
+toc: true
 math: true
 draft: false
 ---
diff --git a/layouts/blog/wide.html b/layouts/blog/wide.html
new file mode 100644
index 0000000..0ba01e1
--- /dev/null
+++ b/layouts/blog/wide.html
@@ -0,0 +1,54 @@
+{{ define "main" }}
+  <div class="hx-mx-auto hx-flex hx-max-w-[90rem]">
+    {{ partial "sidebar.html" (dict "context" . "disableSidebar" true "displayPlaceholder" false) }}
+    <!-- {{ partial "toc.html" . }} -->
+    <article
+      class="hx-w-full hx-break-words hx-min-h-[calc(100vh-var(--navbar-height))] hx-min-w-0 hx-pt-4 hx-pb-8 hx-pl-[max(env(safe-area-inset-left),1.5rem)] hx-pr-[max(env(safe-area-inset-left),1.5rem)]">
+      <main class="hx-w-full hx-min-w-0 hx-px-6 hx-pt-4 md:hx-px-12">
+        {{ partial "breadcrumb.html" . }}
+        <h1 class="hx-mt-2 hx-text-4xl hx-font-bold hx-tracking-tight hx-text-slate-900 dark:hx-text-slate-100">
+          {{ .Title }}
+        </h1>
+        <div class="hx-mt-4 hx-mb-16 hx-text-gray-500 hx-text-sm hx-flex hx-items-center hx-flex-wrap hx-gap-y-2">
+          {{- with $date := .Date }}<span class="hx-mr-1">{{ partial "utils/format-date" $date }}</span>{{ end -}}
+          {{- $lazyLoading := site.Params.enableImageLazyLoading | default true -}}
+          {{ if and .Date .Params.authors }}<span class="hx-mx-1">·</span>{{ end -}}
+          {{- with $.Params.authors -}}
+            {{- range $i, $author := . -}}
+              {{- if reflect.IsMap $author -}}
+                {{- if and $i (not $author.image) }}<span class="hx-mr-1">,</span>{{ end -}}
+                <a
+                  {{ with $author.link }}href="{{ . }}" target="_blank"{{ end }}
+                  class="hx-group hx-inline-flex hx-items-center hx-text-current hx-gap-x-1.5 hx-mx-1"
+                  {{ with $author.name }}title="{{ . }}"{{ end }}>
+                  {{- with $image := $author.image }}
+                    {{- $isLocal := not (urls.Parse $image).Scheme -}}
+                    {{- $startsWithSlash := hasPrefix $image "/" -}}
+                    {{- if and $isLocal $startsWithSlash }}
+                      {{- $image = (relURL (strings.TrimPrefix "/" $image)) -}}
+                    {{ end -}}
+                    <img
+                      src="{{ $image | safeURL }}"
+                      alt="{{ $author.name }}"
+                      class="hx-inline-block hx-h-4 hx-w-4 hx-rounded-full"
+                      {{ if $lazyLoading }}loading="lazy"{{ end }} />
+                  {{ end -}}
+                  <div class="group-hover:hx-underline">{{ $author.name }}</div>
+                </a>
+              {{- else -}}
+                {{- if $i }}<span class="hx-mr-1">,</span>{{ end -}}<span class="hx-mx-1">{{ $author }}</span>
+              {{- end -}}
+            {{- end -}}
+          {{- end -}}
+        </div>
+        <div class="content">
+          {{ .Content }}
+        </div>
+        {{ partial "components/last-updated.html" . }}
+        {{ .Scratch.Set "reversePagination" true }}
+        {{ partial "components/pager.html" . }}
+        {{ partial "components/comments.html" . }}
+      </main>
+    </article>
+  </div>
+{{ end }}
diff --git a/layouts/shortcodes/table.html b/layouts/shortcodes/table.html
new file mode 100644
index 0000000..3d89f4b
--- /dev/null
+++ b/layouts/shortcodes/table.html
@@ -0,0 +1,56 @@
+{{/* Table Shortcode from Hugo Blox Builder. */}}
+{{/* Load a CSV table from page dir falling back to remote URL */}}
+{{/* Defaults to expecting a comma-separated CSV with a header row. */}}
+
+{{/* Docs: https://docs.hugoblox.com/content/writing-markdown-latex/#csv-table
+
+  Parameters
+  ----------
+  src :
+  Path or url to the csv table. Path is relative to the folder where the shortcode is called.
+  delimiter : default ","
+  Field delimiter.
+  header : default "true"
+  If "true", the first row is rendered as the header.
+  caption : optional
+  Caption for the table.
+*/}}
+
+{{ $src := .Get "path" }}
+{{ $delimiter := .Get "delimiter" | default "," }}
+{{ $useHeaderRow := (eq (lower (.Get "header")) "true") | default true }}
+{{ $caption := .Get "caption" }}
+
+{{ $is_remote := strings.HasPrefix $src "http" }}
+{{ $rows := transform.Unmarshal (dict "delimiter" $delimiter) (.Page.Resources.Get $src).Content }}
+
+
+<table class="table">
+  {{ if $useHeaderRow }}
+    {{ $headerRow := index $rows 0 }}
+    {{ $rows = after 1 $rows }}
+    <thead>
+      <tr>
+        {{ range $headerRow }}<th>{{ . | markdownify | emojify }}</th>{{ end }}
+      </tr>
+    </thead>
+  {{ end }}
+  <tbody>
+    {{ range $rows }}
+      <tr>
+        {{ range . }}
+          {{ if (findRE "^\\d+$" .) }}
+            <td data-table-dtype="number">{{ . }}</td>
+          {{ else }}
+            <td data-table-dtype="text">{{ . | markdownify | emojify }}</td>
+          {{ end }}
+        {{ end }}
+      </tr>
+    {{ end }}
+  </tbody>
+  {{ if $caption }}
+    <caption>
+      {{ $caption | markdownify | emojify }}
+    </caption>
+  {{ end }}
+</table>