vllm-project · Copilot · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025
@@ -0,0 +1,169 @@
+# Example Router Configuration with Signal Fusion Engine
+# This is a complete example showing how to integrate the fusion engine
+
+# Standard BERT model configuration (existing)
+bert_model:
+  model_id: sentence-transformers/all-MiniLM-L12-v2
+  threshold: 0.6
+  use_cpu: true
+
+# Classifier configuration (existing)
+classifier:
+  category_model:
+    model_id: "models/classifier_modernbert-base_model"
+    threshold: 0.5
+    use_cpu: true
+    use_modernbert: true
+    category_mapping_path: "models/classifier_modernbert-base_model/category_mapping.json"
+
+  pii_model:
+    model_id: "models/pii_classifier_modernbert-base_model"
+    threshold: 0.7
+    use_cpu: true
+    pii_mapping_path: "models/pii_classifier_modernbert-base_model/pii_type_mapping.json"
+
+# Prompt guard configuration (existing)
+prompt_guard:
+  enabled: true
+  use_modernbert: true
+  model_id: "models/jailbreak_classifier_modernbert-base_model"
+  threshold: 0.7
+  use_cpu: true
+  jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
+
+# ============================================================
+# NEW: Content Scanning and Signal Fusion Configuration
+# ============================================================
+
+content_scanning:
+  # Enable the fusion engine
+  enabled: true
+
+  # Default action when no rules match
+  # Options: "fallthrough" (use BERT), "block"
+  default_action: "fallthrough"
+
+  # Enable audit logging for policy decisions
+  audit_logging: true
+
+  # Signal provider configurations
+  providers:
+    # Keyword matching provider (in-tree, low latency)
+    keyword:
+      enabled: true
+      rules_path: "config/fusion/keyword_rules.yaml"
+
+    # Regex scanning provider (in-tree, low latency)
+    regex:
+      enabled: true
+      patterns_path: "config/fusion/regex_patterns.yaml"
+      engine: "re2"  # Options: "re2" (recommended), "stdlib"
+
+    # Embedding similarity provider (in-tree or MCP)
+    similarity:
+      enabled: true
+      concepts_path: "config/fusion/similarity_concepts.yaml"
+      default_threshold: 0.75
+
+  # Fusion policy - combines all signals into routing decisions
+  fusion_policy:
+    # Option 1: Load rules from external file (recommended for production)
+    rules_path: "config/fusion/policy_rules.yaml"
+
+    # Option 2: Inline rules (good for simple configurations)
+    # rules:
+    #   - name: "block-pii"
+    #     condition: "regex.ssn.matched || regex.credit-card.matched"
+    #     action: "block"
+    #     priority: 200
+    #     message: "PII detected"
+    #   
+    #   - name: "route-k8s"
+    #     condition: "keyword.kubernetes-infrastructure.matched && similarity.infrastructure.score > 0.75"
+    #     action: "route"
+    #     priority: 150
+    #     models: ["k8s-expert", "devops-model"]
+    #   
+    #   - name: "default"
+    #     condition: "!regex.ssn.matched"
+    #     action: "fallthrough"
+    #     priority: 0
+
+# ============================================================
+# Standard Router Configuration (existing)
+# ============================================================
+
+# Categories for routing (existing)
+categories:
+  - name: "computer science"
+    model_scores:
+      qwen-2.5:3b-instruct: 0.78
+      tinyllama-1.1b-chat: 0.65
+    use_reasoning: true
+    reasoning_effort: "medium"
+
+  - name: "math"
+    model_scores:
+      qwen-2.5:3b-instruct: 0.85
+      tinyllama-1.1b-chat: 0.45
+    use_reasoning: true
+    reasoning_effort: "high"
+
+  - name: "business"
+    model_scores:
+      qwen-2.5:3b-instruct: 0.72
+      tinyllama-1.1b-chat: 0.68
+    use_reasoning: false
+    reasoning_effort: "low"
+
+# Default model
+default_model: "qwen-2.5:3b-instruct"
+
+# Default reasoning effort
+default_reasoning_effort: "medium"
+
+# vLLM Endpoints
+vllm_endpoints:
+  - name: "qwen-endpoint"
+    address: "127.0.0.1"
+    port: 8000
+    weight: 1
+    health_check_path: "/health"
+
+# Semantic cache configuration
+semantic_cache:
+  enabled: true
+  backend_type: "memory"
+  similarity_threshold: 0.8
+  max_entries: 1000
+  ttl_seconds: 3600
+
+# Tools configuration
+tools:
+  enabled: true
+  top_k: 3
+  similarity_threshold: 0.2
+  tools_db_path: "config/tools_db.json"
+  fallback_to_empty: true
+
+# API configuration
+api:
+  batch_classification:
+    metrics:
+      enabled: true
+      sample_rate: 1.0
+
+# Observability configuration
+observability:
+  tracing:
+    enabled: false
+    provider: "opentelemetry"
+    exporter:
+      type: "otlp"
+      endpoint: "localhost:4317"
+      insecure: true
+    sampling:
+      type: "always_on"
+    resource:
+      service_name: "semantic-router"
+      deployment_environment: "development"
@@ -0,0 +1,106 @@
+# Keyword Rules Configuration
+# Defines keyword matching rules for deterministic routing
+
+rules:
+  # Infrastructure and DevOps keywords
+  - name: "kubernetes-infrastructure"
+    description: "Kubernetes and container orchestration terms"
+    keywords:
+      - "kubernetes"
+      - "k8s"
+      - "kubectl"
+      - "helm"
+      - "pod"
+      - "deployment"
+      - "service mesh"
+      - "istio"
+      - "kustomize"
+    operator: "OR"  # Match if ANY keyword is found
+    case_sensitive: false
+
+  - name: "docker"
+    description: "Docker and containerization terms"
+    keywords:
+      - "docker"
+      - "container"
+      - "dockerfile"
+      - "docker-compose"
+      - "docker image"
+      - "docker container"
+    operator: "OR"
+    case_sensitive: false
+
+  # Database keywords
+  - name: "database"
+    description: "Database and SQL terms"
+    keywords:
+      - "database"
+      - "sql"
+      - "mysql"
+      - "postgresql"
+      - "mongodb"
+      - "redis"
+      - "query optimization"
+      - "index"
+      - "transaction"
+    operator: "OR"
+    case_sensitive: false
+
+  # Security keywords
+  - name: "security"
+    description: "Security and cybersecurity terms"
+    keywords:
+      - "security"
+      - "vulnerability"
+      - "exploit"
+      - "firewall"
+      - "encryption"
+      - "authentication"
+      - "authorization"
+      - "penetration test"
+    operator: "OR"
+    case_sensitive: false
+
+  # Programming keywords
+  - name: "programming"
+    description: "Programming and software development terms"
+    keywords:
+      - "code"
+      - "function"
+      - "class"
+      - "algorithm"
+      - "debug"
+      - "refactor"
+      - "unit test"
+      - "programming"
+    operator: "OR"
+    case_sensitive: false
+
+  # Mathematics keywords
+  - name: "mathematics"
+    description: "Mathematics and computation terms"
+    keywords:
+      - "calculus"
+      - "derivative"
+      - "integral"
+      - "matrix"
+      - "algebra"
+      - "geometry"
+      - "probability"
+      - "statistics"
+    operator: "OR"
+    case_sensitive: false
+
+  # Performance keywords
+  - name: "performance"
+    description: "Performance optimization terms"
+    keywords:
+      - "performance"
+      - "optimization"
+      - "latency"
+      - "throughput"
+      - "bottleneck"
+      - "profiling"
+      - "benchmark"
+    operator: "OR"
+    case_sensitive: false
@@ -0,0 +1,118 @@
+# Signal Fusion Policy Rules Configuration
+# This file defines the fusion policy rules that combine signals into routing decisions
+
+rules:
+  # ============================================================
+  # PRIORITY 200: Safety Blocks
+  # These rules have the highest priority and block dangerous requests
+  # ============================================================
+
+  - name: "block-ssn-pattern"
+    condition: "regex.ssn.matched"
+    action: "block"
+    priority: 200
+    message: "Request contains Social Security Number pattern and cannot be processed for security reasons"
+
+  - name: "block-credit-card"
+    condition: "regex.credit-card.matched"
+    action: "block"
+    priority: 200
+    message: "Request contains credit card number pattern and cannot be processed for security reasons"
+
+  - name: "block-combined-pii"
+    condition: "regex.ssn.matched || regex.credit-card.matched || regex.email-with-password.matched"
+    action: "block"
+    priority: 200
+    message: "PII detected - request blocked for security compliance"
+
+  # ============================================================
+  # PRIORITY 150: High-Confidence Routing
+  # These rules route to specialized models when multiple signals agree
+  # ============================================================
+
+  - name: "route-kubernetes-expert"
+    condition: "keyword.kubernetes-infrastructure.matched && similarity.infrastructure.score > 0.75"
+    action: "route"
+    priority: 150
+    models:
+      - "k8s-expert"
+      - "devops-model"
+
+  - name: "route-security-expert"
+    condition: "(keyword.security.matched || regex.cve-id.matched) && bert.category.value == 'computer science'"
+    action: "route"
+    priority: 150
+    models:
+      - "security-hardened-model"
+      - "cybersecurity-specialist"
+
+  - name: "route-database-expert"
+    condition: "keyword.database.matched && similarity.database.score > 0.8"
+    action: "route"
+    priority: 150
+    models:
+      - "db-expert"
+      - "sql-specialist"
+
+  - name: "route-docker-expert"
+    condition: "keyword.docker.matched && (similarity.containers.score > 0.75 || bert.category.value == 'computer science')"
+    action: "route"
+    priority: 150
+    models:
+      - "docker-expert"
+      - "containerization-specialist"
+
+  # ============================================================
+  # PRIORITY 100: Category Boosting
+  # These rules boost BERT category weights based on signal detection
+  # ============================================================
+
+  - name: "boost-reasoning-category"
+    condition: "similarity.reasoning.score > 0.75"
+    action: "boost_category"
+    priority: 100
+    category: "reasoning"
+    boost_weight: 1.5
+
+  - name: "boost-math-category"
+    condition: "keyword.mathematics.matched || similarity.mathematics.score > 0.7"
+    action: "boost_category"
+    priority: 100
+    category: "math"
+    boost_weight: 1.4
+
+  - name: "boost-code-generation"
+    condition: "keyword.programming.matched && similarity.code-generation.score > 0.7"
+    action: "boost_category"
+    priority: 100
+    category: "computer science"
+    boost_weight: 1.3
+
+  # ============================================================
+  # PRIORITY 50: Multi-Signal Consensus
+  # These rules require multiple independent signals to agree
+  # ============================================================
+
+  - name: "consensus-k8s-security"
+    condition: "keyword.kubernetes-infrastructure.matched && keyword.security.matched && similarity.infrastructure.score > 0.8 && bert.category.value == 'computer science'"
+    action: "route"
+    priority: 50
+    models:
+      - "k8s-security-expert"
+
+  - name: "consensus-database-performance"
+    condition: "keyword.database.matched && keyword.performance.matched && similarity.database.score > 0.75"
+    action: "route"
+    priority: 50
+    models:
+      - "db-performance-expert"
+
+  # ============================================================
+  # PRIORITY 0: Default Fallthrough
+  # This rule catches all non-blocked requests and uses BERT
+  # ============================================================
+
+  - name: "default-fallthrough"
+    condition: "!regex.ssn.matched && !regex.credit-card.matched"
+    action: "fallthrough"
+    priority: 0