Skip to content

Commit

Permalink
feat(k8s): add litellm (#20346)
Browse files Browse the repository at this point in the history
  • Loading branch information
hongbo-miao authored Nov 10, 2024
1 parent bc4435a commit ebaba23
Show file tree
Hide file tree
Showing 12 changed files with 321 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -691,3 +691,27 @@ module "kubernetes_namespace_hm_kafbat_ui" {
module.amazon_eks_cluster
]
}

# LiteLLM
# LiteLLM - IAM role
module "hm_litellm_iam_role" {
providers = { aws = aws.production }
source = "../../../../modules/kubernetes/hm_litellm_iam_role"
litellm_service_account_name = "hm-litellm-service-account"
litellm_namespace = "${var.environment}-hm-litellm"
amazon_eks_cluster_oidc_provider = module.amazon_eks_cluster.oidc_provider
amazon_eks_cluster_oidc_provider_arn = module.amazon_eks_cluster.oidc_provider_arn
environment = var.environment
team = var.team
}
# LiteLLM - Kubernetes namespace
module "hm_kubernetes_namespace_hm_litellm" {
source = "../../../../modules/kubernetes/hm_kubernetes_namespace"
kubernetes_namespace = "${var.environment}-hm-litellm"
labels = {
"goldilocks.fairwinds.com/enabled" = "true"
}
depends_on = [
module.amazon_eks_cluster
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
}
}
}

locals {
aws_iam_role_name_prefix = "LiteLLMRole"
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role
resource "aws_iam_role" "hm_litellm_iam_role" {
name = "${local.aws_iam_role_name_prefix}-${var.litellm_service_account_name}"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Federated = var.amazon_eks_cluster_oidc_provider_arn
}
Action = "sts:AssumeRoleWithWebIdentity"
Condition = {
StringEquals = {
"${var.amazon_eks_cluster_oidc_provider}:aud" = "sts.amazonaws.com",
"${var.amazon_eks_cluster_oidc_provider}:sub" = "system:serviceaccount:${var.litellm_namespace}:${var.litellm_service_account_name}"
}
}
}
]
})
tags = {
Environment = var.environment
Team = var.team
Name = "${local.aws_iam_role_name_prefix}-${var.litellm_service_account_name}"
}
}
# https://docs.aws.amazon.com/bedrock/latest/userguide/security_iam_id-based-policy-examples.html#security_iam_id-based-policy-examples-perform-actions-pt
# https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
# https://registry.terraform.io/providers/vancluever/acme/latest/docs/guides/dns-providers-route53#least-privilege-policy-for-production-purposes
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy
resource "aws_iam_role_policy" "hm_litellm_iam_role_policy" {
name = "${local.aws_iam_role_name_prefix}Policy-${var.litellm_service_account_name}"
role = aws_iam_role.hm_litellm_iam_role.name
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"bedrock:InvokeModel",
"bedrock:InvokeModelWithResponseStream"
]
Resource = [
# Claude Haiku
"arn:aws:bedrock:*::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
"arn:aws:bedrock:*::foundation-model/anthropic.claude-3-haiku-20240307-v1:0",
# Claude Opus
"arn:aws:bedrock:*::foundation-model/anthropic.claude-3-opus-20240229-v1:0",
# Claude Sonnet
"arn:aws:bedrock:*::foundation-model/anthropic.claude-3-5-sonnet-20241022-v2:0",
"arn:aws:bedrock:*::foundation-model/anthropic.claude-3-5-sonnet-20240620-v1:0"
]
}
]
})
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
variable "litellm_service_account_name" {
type = string
}
variable "litellm_namespace" {
type = string
}
variable "amazon_eks_cluster_oidc_provider" {
type = string
}
variable "amazon_eks_cluster_oidc_provider_arn" {
type = string
}
variable "environment" {
type = string
}
variable "team" {
type = string
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
argo-cd-app-create:
argocd app create production-hm-litellm --file=manifests/hm-litellm-application.yaml
argo-cd-app-update:
argocd app create production-hm-litellm --file=manifests/hm-litellm-application.yaml --upsert
argo-cd-app-delete:
argocd app delete production-hm-litellm --yes
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: production-hm-litellm
namespace: production-hm-argo-cd
labels:
app.kubernetes.io/name: hm-litellm
spec:
project: production-hm
source:
repoURL: [email protected]:hongbo-miao/hongbomiao.com.git
targetRevision: main
path: kubernetes/argo-cd/applications/production-hm/litellm/kubernetes-manifests
destination:
namespace: production-hm-litellm
server: https://kubernetes.default.svc
syncPolicy:
syncOptions:
- ServerSideApply=true
automated:
prune: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: hm-litellm-config-map
namespace: production-hm-litellm
annotations:
# https://argo-cd.readthedocs.io/en/stable/user-guide/resource_hooks
argocd.argoproj.io/hook: PreSync
labels:
app.kubernetes.io/name: hm-litellm-config-map
app.kubernetes.io/part-of: production-hm-litellm
data:
config.yaml: |
# https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
model_list:
# Claude Haiku
- model_name: claude-3-5-haiku
litellm_params:
model: anthropic.claude-3-5-haiku-20241022-v1:0
# Claude Opus
- model_name: claude-3-opus
litellm_params:
model: bedrock/anthropic.claude-3-opus-20240229-v1:0
# Claude Sonnet
- model_name: claude-3-5-sonnet
litellm_params:
model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
litellm_settings:
modify_params: True
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: hm-litellm-deployment
namespace: production-hm-litellm
labels:
app.kubernetes.io/name: hm-litellm-deployment
app.kubernetes.io/part-of: production-hm-litellm
spec:
replicas: 1
selector:
matchLabels:
app: hm-litellm
template:
metadata:
labels:
app: hm-litellm
spec:
serviceAccountName: hm-litellm-service-account
containers:
- name: litellm
image: ghcr.io/berriai/litellm:main-v1.52.3
ports:
- name: litellm
protocol: TCP
containerPort: 4000
command: ["litellm", "--port", "4000", "--config", "/app/config.yaml", "--detailed_debug"]
volumeMounts:
- name: litellm-volume
mountPath: /app/config.yaml
subPath: config.yaml
resources:
requests:
cpu: 50m
memory: 512Mi
limits:
cpu: 100m
memory: 1Gi
volumes:
- name: litellm-volume
configMap:
name: hm-litellm-config-map
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: hm-litellm-ingress
namespace: production-hm-litellm
annotations:
kubernetes.io/ingress.class: traefik
# https://doc.traefik.io/traefik/routing/providers/kubernetes-ingress/#on-ingress
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
# https://kubernetes-sigs.github.io/external-dns/latest/annotations/annotations
external-dns.alpha.kubernetes.io/hostname: hm-litellm.internal.hongbomiao.com
# https://argo-cd.readthedocs.io/en/stable/user-guide/resource_hooks
argocd.argoproj.io/hook: PostSync
labels:
app.kubernetes.io/name: hm-litellm-ingress
app.kubernetes.io/part-of: production-hm-litellm
spec:
rules:
- host: hm-litellm.internal.hongbomiao.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: hm-litellm-service
port:
number: 80
tls:
- hosts:
- hm-litellm.internal.hongbomiao.com
secretName: production-hm-litellm-certificate
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: hm-litellm-service-account
namespace: production-hm-litellm
annotations:
# https://docs.aws.amazon.com/eks/latest/userguide/associate-service-account-role.html
eks.amazonaws.com/role-arn: arn:aws:iam::272394222652:role/LiteLLMRole-hm-litellm-service-account
labels:
app.kubernetes.io/name: hm-litellm-service-account
app.kubernetes.io/part-of: production-hm-litellm
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
apiVersion: v1
kind: Service
metadata:
name: hm-litellm-service
namespace: production-hm-litellm
labels:
app.kubernetes.io/name: hm-litellm-service
app.kubernetes.io/part-of: production-hm-litellm
spec:
type: ClusterIP
selector:
app: hm-litellm
ports:
- name: litellm
protocol: TCP
targetPort: litellm
port: 80
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# https://cert-manager.io/docs/usage/certificate
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: production-hm-litellm-certificate
namespace: production-hm-litellm
annotations:
# https://argo-cd.readthedocs.io/en/stable/user-guide/resource_hooks
argocd.argoproj.io/hook: PreSync
spec:
commonName: hm-litellm.internal.hongbomiao.com
secretName: production-hm-litellm-certificate
dnsNames:
- hm-litellm.internal.hongbomiao.com
issuerRef:
kind: ClusterIssuer
name: production-lets-encrypt-cluster-issuer
# https://letsencrypt.org/certificates
privateKey:
algorithm: ECDSA
size: 384
rotationPolicy: Always
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# https://cert-manager.io/docs/usage/certificate
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: staging-hm-litellm-certificate
namespace: production-hm-litellm
annotations:
# https://argo-cd.readthedocs.io/en/stable/user-guide/resource_hooks
argocd.argoproj.io/hook: PreSync
spec:
commonName: hm-litellm.internal.hongbomiao.com
secretName: staging-hm-litellm-certificate
dnsNames:
- hm-litellm.internal.hongbomiao.com
issuerRef:
kind: ClusterIssuer
name: staging-lets-encrypt-cluster-issuer
# https://letsencrypt.org/certificates
privateKey:
algorithm: ECDSA
size: 384
rotationPolicy: Always

0 comments on commit ebaba23

Please sign in to comment.