Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/services/ghost/GhostContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,18 @@ export class GhostContext {
if (!context.document) {
return context
}
// Add operations from current document
const recentOperations = this.documentStore.getRecentOperations(context.document)
if (recentOperations) {
context.recentOperations = recentOperations
}

// Add global operations from all files (excluding current document)
const globalOperations = this.documentStore.getGlobalRecentOperations(context.document.uri.toString())
if (globalOperations && globalOperations.length > 0) {
context.globalRecentOperations = globalOperations
}

return context
}

Expand Down
53 changes: 53 additions & 0 deletions src/services/ghost/GhostDocumentStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ export class GhostDocumentStore {
private documentStore: Map<string, GhostDocumentStoreItem> = new Map()
private parserInitialized: boolean = false

// Global recent operations across all files
private globalRecentOperations: Array<UserAction & { filepath: string }> = []
private readonly maxGlobalOperations = 10

/**
* Store a document in the document store
* @param document The document to store
Expand Down Expand Up @@ -46,6 +50,19 @@ export class GhostDocumentStore {
item.history.shift() // Remove the oldest snapshot if we exceed the limit
}

// Analyze and track global operations if we have enough history
if (item.history.length >= 2) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

const oldContent = item.history[item.history.length - 2]
const newContent = item.history[item.history.length - 1]
const filePath = vscode.workspace.asRelativePath(document.uri)
const operations = this.analyzeDocumentChanges(oldContent, newContent, filePath)

// Add to global operations with filepath
for (const op of operations) {
this.addGlobalOperation(op, uri)
}
}

// Once executed, remove the timer from the map.
this.debounceTimers.delete(uri)
}
Expand Down Expand Up @@ -458,4 +475,40 @@ export class GhostDocumentStore {

return []
}

/**
* Add an operation to the global recent operations list
* @param operation The operation to add
* @param filepath The file where the operation occurred
*/
private addGlobalOperation(operation: UserAction, filepath: string): void {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs to respect kilocodeignore/gitignore, see #2852

this.globalRecentOperations.unshift({
...operation,
filepath,
})

// Keep only the most recent operations
if (this.globalRecentOperations.length > this.maxGlobalOperations) {
this.globalRecentOperations = this.globalRecentOperations.slice(0, this.maxGlobalOperations)
}
}

/**
* Get global recent operations from all files
* @param excludeFilepath Optional filepath to exclude from results
* @returns Array of recent operations with their source files
*/
public getGlobalRecentOperations(excludeFilepath?: string): Array<UserAction & { filepath: string }> {
if (excludeFilepath) {
return this.globalRecentOperations.filter((op) => op.filepath !== excludeFilepath)
}
return [...this.globalRecentOperations]
}

/**
* Clear all global recent operations
*/
public clearGlobalRecentOperations(): void {
this.globalRecentOperations = []
}
}
134 changes: 134 additions & 0 deletions src/services/ghost/context/ContextRanking.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/**
* Context ranking utilities for autocomplete
* Based on Continue's ranking approach using Jaccard similarity
*/

export interface RankedSnippet {
content: string
filepath: string
score: number
}

const SYMBOL_REGEX = /[\s.,\/#!$%\^&\*;:{}=\-_`~()\[\]]/g

/**
* Extract symbols from a code snippet by splitting on common delimiters
*/
export function getSymbolsForSnippet(snippet: string): Set<string> {
const symbols = snippet
.split(SYMBOL_REGEX)
.map((s) => s.trim())
.filter((s) => s !== "")
return new Set(symbols)
}

/**
* Calculate Jaccard similarity between two strings
* Returns a value between 0 and 1, where:
* - 0 means no common symbols
* - 1 means identical symbol sets
*
* Formula: |A ∩ B| / |A ∪ B|
* Where A and B are sets of symbols from each string
*/
export function jaccardSimilarity(a: string, b: string): number {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this feels like it could be slow for large files

const aSet = getSymbolsForSnippet(a)
const bSet = getSymbolsForSnippet(b)
const union = new Set([...aSet, ...bSet]).size
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we know the size of the intersection, we know the size of the union right? So this isn't necessary (and probably slow, especially since you're converting to an array in between)


// Avoid division by zero
if (union === 0) {
return 0
}

let intersection = 0
for (const symbol of aSet) {
if (bSet.has(symbol)) {
intersection++
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}
}

return intersection / union
}

/**
* Rank code snippets based on their similarity to the window around the cursor
*
* @param snippets - Array of code snippets to rank
* @param windowAroundCursor - Code context around the cursor position
* @returns Sorted array of snippets with scores (highest score first)
*/
export function rankSnippets(
snippets: Array<{ content: string; filepath: string }>,
windowAroundCursor: string,
): RankedSnippet[] {
const rankedSnippets: RankedSnippet[] = snippets.map((snippet) => ({
...snippet,
score: jaccardSimilarity(snippet.content, windowAroundCursor),
}))

// Sort by score descending (highest score first)
return rankedSnippets.sort((a, b) => b.score - a.score)
}

/**
* Deduplicate snippets from the same file by merging overlapping content
*/
export function deduplicateSnippets(snippets: RankedSnippet[]): RankedSnippet[] {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are these and following methods unused? Especially I think the constraining of amount of syntax is especially important as it is easy to overwhelm small models. We might even need to compress / slice the current file a bit if it is too large

// Group by file
const fileGroups: { [key: string]: RankedSnippet[] } = {}
for (const snippet of snippets) {
if (!fileGroups[snippet.filepath]) {
fileGroups[snippet.filepath] = []
}
fileGroups[snippet.filepath].push(snippet)
}

// For each file, keep only the highest scored snippet
const deduplicated: RankedSnippet[] = []
for (const file of Object.keys(fileGroups)) {
const snippetsInFile = fileGroups[file]
if (snippetsInFile.length === 0) continue

// Sort by score and take the best one
snippetsInFile.sort((a, b) => b.score - a.score)
deduplicated.push(snippetsInFile[0])
}

return deduplicated
}

/**
* Filter snippets to fit within a token budget
*
* @param snippets - Ranked snippets (should be sorted by score)
* @param maxTokens - Maximum number of tokens to use
* @param estimateTokens - Function to estimate token count for a string
* @returns Array of snippets that fit within the budget
*/
export function fillPromptWithSnippets(
snippets: RankedSnippet[],
maxTokens: number,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we probably need to use the current file as input for maxTokens

estimateTokens: (text: string) => number,
): RankedSnippet[] {
let tokensRemaining = maxTokens
const keptSnippets: RankedSnippet[] = []

for (const snippet of snippets) {
const tokenCount = estimateTokens(snippet.content)
if (tokensRemaining - tokenCount >= 0) {
tokensRemaining -= tokenCount
keptSnippets.push(snippet)
}
}

return keptSnippets
}

/**
* Simple token estimation (roughly 4 characters per token)
* For more accurate estimation, use a proper tokenizer
*/
export function estimateTokenCount(text: string): number {
return Math.ceil(text.length / 4)
}
98 changes: 96 additions & 2 deletions src/services/ghost/strategies/FimCodestralStrategy.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { GhostSuggestionContext } from "../types"
import { PromptStrategy, UseCaseType } from "../types/PromptStrategy"
import { CURSOR_MARKER } from "../ghostConstants"
import { rankSnippets } from "../context/ContextRanking"
import { getBaseSystemInstructions } from "./StrategyHelpers"

/**
Expand Down Expand Up @@ -54,17 +55,110 @@ Generate code to fill in at the cursor position. The code should:
if (!context.document || !context.range) {
return "No context available for completion."
}

// Get recent operations for additional context (from existing system)
const recentOpsContext = this.getRecentOperationsContext(context)

const document = context.document
const position = context.range.start

// FIXME: use addCursorMarker from StrategyHelpers.ts
// FIXME: use addCursorMarker from StrategyHelpers.ts
// Get the code before and after the cursor
const fullText = document.getText()
const offset = document.offsetAt(position)
const textBeforeCursor = fullText.substring(0, offset)
const textAfterCursor = fullText.substring(offset)

return `[SUFFIX]${textAfterCursor}[PREFIX]${textBeforeCursor}${CURSOR_MARKER}`
return `[SUFFIX]${textAfterCursor}[PREFIX]${recentOpsContext}${textBeforeCursor}${CURSOR_MARKER}`
}

/**
* Get recent operations as context string from existing GhostDocumentStore
* Uses Jaccard similarity ranking to prioritize most relevant operations
*/
private getRecentOperationsContext(context: GhostSuggestionContext): string {
if (!context.document || !context.range) {
return ""
}

// Get window around cursor for similarity comparison
const position = context.range.start
const windowSize = 500 // characters before and after cursor
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in the context of an extension/site windowSize seems like a visual thing, maybe characterLookAroundSize or something?


const textBeforeCursor = context.document.getText(
new (context.range.constructor as any)(
new (position.constructor as any)(Math.max(0, position.line - 5), 0),
position,
),
)
const textAfterCursor = context.document.getText(
new (context.range.constructor as any)(
position,
new (position.constructor as any)(Math.min(position.line + 5, context.document.lineCount), 0),
),
)
const windowAroundCursor = textBeforeCursor + textAfterCursor

// Collect all operations with their content
const allOperations: Array<{ content: string; filepath: string; description: string; isGlobal: boolean }> = []

// Add current file operations
if (context.recentOperations && context.recentOperations.length > 0) {
context.recentOperations.forEach((op) => {
if (op.content) {
allOperations.push({
content: op.content,
filepath: context.document!.uri.toString(),
description: op.description,
isGlobal: false,
})
}
})
}

// Add global operations from other files
if (context.globalRecentOperations && context.globalRecentOperations.length > 0) {
context.globalRecentOperations.forEach((op) => {
if (op.content) {
allOperations.push({
content: op.content,
filepath: op.filepath,
description: op.description,
isGlobal: true,
})
}
})
}

if (allOperations.length === 0) {
return ""
}

// Rank operations by similarity to code around cursor
const rankedOps = rankSnippets(
allOperations.map((op) => ({
content: op.content,
filepath: op.filepath,
})),
windowAroundCursor,
)

// Take top 3 most relevant operations
const topOperations = rankedOps.slice(0, 3)

// Format with descriptions
const contextParts = topOperations.map((ranked) => {
const op = allOperations.find((o) => o.content === ranked.content && o.filepath === ranked.filepath)
if (!op) return ""

if (op.isGlobal) {
const filename = op.filepath.split("/").pop() || op.filepath
return `// Recent in ${filename}: ${op.description} (relevance: ${(ranked.score * 100).toFixed(0)}%)\n${ranked.content}`
} else {
return `// Recent: ${op.description} (relevance: ${(ranked.score * 100).toFixed(0)}%)\n${ranked.content}`
}
})

return contextParts.length > 0 ? `${contextParts.filter(Boolean).join("\n\n")}\n\n` : ""
}
}
3 changes: 2 additions & 1 deletion src/services/ghost/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export interface GhostSuggestionContext {
openFiles?: vscode.TextDocument[]
range?: vscode.Range | vscode.Selection
userInput?: string
recentOperations?: UserAction[] // Stores meaningful user actions instead of raw diff
recentOperations?: UserAction[] // Stores meaningful user actions from current document
globalRecentOperations?: Array<UserAction & { filepath: string }> // Recent operations from all workspace files
diagnostics?: vscode.Diagnostic[] // Document diagnostics (errors, warnings, etc.)
}