-
Notifications
You must be signed in to change notification settings - Fork 1.2k
FIM strategy context improvements #2863
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,10 @@ export class GhostDocumentStore { | |
private documentStore: Map<string, GhostDocumentStoreItem> = new Map() | ||
private parserInitialized: boolean = false | ||
|
||
// Global recent operations across all files | ||
private globalRecentOperations: Array<UserAction & { filepath: string }> = [] | ||
private readonly maxGlobalOperations = 10 | ||
|
||
/** | ||
* Store a document in the document store | ||
* @param document The document to store | ||
|
@@ -46,6 +50,19 @@ export class GhostDocumentStore { | |
item.history.shift() // Remove the oldest snapshot if we exceed the limit | ||
} | ||
|
||
// Analyze and track global operations if we have enough history | ||
if (item.history.length >= 2) { | ||
const oldContent = item.history[item.history.length - 2] | ||
const newContent = item.history[item.history.length - 1] | ||
const filePath = vscode.workspace.asRelativePath(document.uri) | ||
const operations = this.analyzeDocumentChanges(oldContent, newContent, filePath) | ||
|
||
// Add to global operations with filepath | ||
for (const op of operations) { | ||
this.addGlobalOperation(op, uri) | ||
} | ||
} | ||
|
||
// Once executed, remove the timer from the map. | ||
this.debounceTimers.delete(uri) | ||
} | ||
|
@@ -458,4 +475,40 @@ export class GhostDocumentStore { | |
|
||
return [] | ||
} | ||
|
||
/** | ||
* Add an operation to the global recent operations list | ||
* @param operation The operation to add | ||
* @param filepath The file where the operation occurred | ||
*/ | ||
private addGlobalOperation(operation: UserAction, filepath: string): void { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this needs to respect kilocodeignore/gitignore, see #2852 |
||
this.globalRecentOperations.unshift({ | ||
...operation, | ||
filepath, | ||
}) | ||
|
||
// Keep only the most recent operations | ||
if (this.globalRecentOperations.length > this.maxGlobalOperations) { | ||
this.globalRecentOperations = this.globalRecentOperations.slice(0, this.maxGlobalOperations) | ||
} | ||
} | ||
|
||
/** | ||
* Get global recent operations from all files | ||
* @param excludeFilepath Optional filepath to exclude from results | ||
* @returns Array of recent operations with their source files | ||
*/ | ||
public getGlobalRecentOperations(excludeFilepath?: string): Array<UserAction & { filepath: string }> { | ||
if (excludeFilepath) { | ||
return this.globalRecentOperations.filter((op) => op.filepath !== excludeFilepath) | ||
} | ||
return [...this.globalRecentOperations] | ||
} | ||
|
||
/** | ||
* Clear all global recent operations | ||
*/ | ||
public clearGlobalRecentOperations(): void { | ||
this.globalRecentOperations = [] | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
/** | ||
* Context ranking utilities for autocomplete | ||
* Based on Continue's ranking approach using Jaccard similarity | ||
*/ | ||
|
||
export interface RankedSnippet { | ||
content: string | ||
filepath: string | ||
score: number | ||
} | ||
|
||
const SYMBOL_REGEX = /[\s.,\/#!$%\^&\*;:{}=\-_`~()\[\]]/g | ||
|
||
/** | ||
* Extract symbols from a code snippet by splitting on common delimiters | ||
*/ | ||
export function getSymbolsForSnippet(snippet: string): Set<string> { | ||
const symbols = snippet | ||
.split(SYMBOL_REGEX) | ||
.map((s) => s.trim()) | ||
.filter((s) => s !== "") | ||
return new Set(symbols) | ||
} | ||
|
||
/** | ||
* Calculate Jaccard similarity between two strings | ||
* Returns a value between 0 and 1, where: | ||
* - 0 means no common symbols | ||
* - 1 means identical symbol sets | ||
* | ||
* Formula: |A ∩ B| / |A ∪ B| | ||
* Where A and B are sets of symbols from each string | ||
*/ | ||
export function jaccardSimilarity(a: string, b: string): number { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this feels like it could be slow for large files |
||
const aSet = getSymbolsForSnippet(a) | ||
const bSet = getSymbolsForSnippet(b) | ||
const union = new Set([...aSet, ...bSet]).size | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we know the size of the intersection, we know the size of the union right? So this isn't necessary (and probably slow, especially since you're converting to an array in between) |
||
|
||
// Avoid division by zero | ||
if (union === 0) { | ||
return 0 | ||
} | ||
|
||
let intersection = 0 | ||
for (const symbol of aSet) { | ||
if (bSet.has(symbol)) { | ||
intersection++ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can't use these? |
||
} | ||
} | ||
|
||
return intersection / union | ||
} | ||
|
||
/** | ||
* Rank code snippets based on their similarity to the window around the cursor | ||
* | ||
* @param snippets - Array of code snippets to rank | ||
* @param windowAroundCursor - Code context around the cursor position | ||
* @returns Sorted array of snippets with scores (highest score first) | ||
*/ | ||
export function rankSnippets( | ||
snippets: Array<{ content: string; filepath: string }>, | ||
windowAroundCursor: string, | ||
): RankedSnippet[] { | ||
const rankedSnippets: RankedSnippet[] = snippets.map((snippet) => ({ | ||
...snippet, | ||
score: jaccardSimilarity(snippet.content, windowAroundCursor), | ||
})) | ||
|
||
// Sort by score descending (highest score first) | ||
return rankedSnippets.sort((a, b) => b.score - a.score) | ||
} | ||
|
||
/** | ||
* Deduplicate snippets from the same file by merging overlapping content | ||
*/ | ||
export function deduplicateSnippets(snippets: RankedSnippet[]): RankedSnippet[] { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are these and following methods unused? Especially I think the constraining of amount of syntax is especially important as it is easy to overwhelm small models. We might even need to compress / slice the current file a bit if it is too large |
||
// Group by file | ||
const fileGroups: { [key: string]: RankedSnippet[] } = {} | ||
for (const snippet of snippets) { | ||
if (!fileGroups[snippet.filepath]) { | ||
fileGroups[snippet.filepath] = [] | ||
} | ||
fileGroups[snippet.filepath].push(snippet) | ||
} | ||
|
||
// For each file, keep only the highest scored snippet | ||
const deduplicated: RankedSnippet[] = [] | ||
for (const file of Object.keys(fileGroups)) { | ||
const snippetsInFile = fileGroups[file] | ||
if (snippetsInFile.length === 0) continue | ||
|
||
// Sort by score and take the best one | ||
snippetsInFile.sort((a, b) => b.score - a.score) | ||
deduplicated.push(snippetsInFile[0]) | ||
} | ||
|
||
return deduplicated | ||
} | ||
|
||
/** | ||
* Filter snippets to fit within a token budget | ||
* | ||
* @param snippets - Ranked snippets (should be sorted by score) | ||
* @param maxTokens - Maximum number of tokens to use | ||
* @param estimateTokens - Function to estimate token count for a string | ||
* @returns Array of snippets that fit within the budget | ||
*/ | ||
export function fillPromptWithSnippets( | ||
snippets: RankedSnippet[], | ||
maxTokens: number, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we probably need to use the current file as input for maxTokens |
||
estimateTokens: (text: string) => number, | ||
): RankedSnippet[] { | ||
let tokensRemaining = maxTokens | ||
const keptSnippets: RankedSnippet[] = [] | ||
|
||
for (const snippet of snippets) { | ||
const tokenCount = estimateTokens(snippet.content) | ||
if (tokensRemaining - tokenCount >= 0) { | ||
tokensRemaining -= tokenCount | ||
keptSnippets.push(snippet) | ||
} | ||
} | ||
|
||
return keptSnippets | ||
} | ||
|
||
/** | ||
* Simple token estimation (roughly 4 characters per token) | ||
* For more accurate estimation, use a proper tokenizer | ||
*/ | ||
export function estimateTokenCount(text: string): number { | ||
return Math.ceil(text.length / 4) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
import { GhostSuggestionContext } from "../types" | ||
import { PromptStrategy, UseCaseType } from "../types/PromptStrategy" | ||
import { CURSOR_MARKER } from "../ghostConstants" | ||
import { rankSnippets } from "../context/ContextRanking" | ||
import { getBaseSystemInstructions } from "./StrategyHelpers" | ||
|
||
/** | ||
|
@@ -54,17 +55,110 @@ Generate code to fill in at the cursor position. The code should: | |
if (!context.document || !context.range) { | ||
return "No context available for completion." | ||
} | ||
|
||
// Get recent operations for additional context (from existing system) | ||
const recentOpsContext = this.getRecentOperationsContext(context) | ||
|
||
const document = context.document | ||
const position = context.range.start | ||
|
||
// FIXME: use addCursorMarker from StrategyHelpers.ts | ||
// FIXME: use addCursorMarker from StrategyHelpers.ts | ||
// Get the code before and after the cursor | ||
const fullText = document.getText() | ||
const offset = document.offsetAt(position) | ||
const textBeforeCursor = fullText.substring(0, offset) | ||
const textAfterCursor = fullText.substring(offset) | ||
|
||
return `[SUFFIX]${textAfterCursor}[PREFIX]${textBeforeCursor}${CURSOR_MARKER}` | ||
return `[SUFFIX]${textAfterCursor}[PREFIX]${recentOpsContext}${textBeforeCursor}${CURSOR_MARKER}` | ||
} | ||
|
||
/** | ||
* Get recent operations as context string from existing GhostDocumentStore | ||
* Uses Jaccard similarity ranking to prioritize most relevant operations | ||
*/ | ||
private getRecentOperationsContext(context: GhostSuggestionContext): string { | ||
if (!context.document || !context.range) { | ||
return "" | ||
} | ||
|
||
// Get window around cursor for similarity comparison | ||
const position = context.range.start | ||
const windowSize = 500 // characters before and after cursor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in the context of an extension/site windowSize seems like a visual thing, maybe characterLookAroundSize or something? |
||
|
||
const textBeforeCursor = context.document.getText( | ||
new (context.range.constructor as any)( | ||
new (position.constructor as any)(Math.max(0, position.line - 5), 0), | ||
position, | ||
), | ||
) | ||
const textAfterCursor = context.document.getText( | ||
new (context.range.constructor as any)( | ||
position, | ||
new (position.constructor as any)(Math.min(position.line + 5, context.document.lineCount), 0), | ||
), | ||
) | ||
const windowAroundCursor = textBeforeCursor + textAfterCursor | ||
|
||
// Collect all operations with their content | ||
const allOperations: Array<{ content: string; filepath: string; description: string; isGlobal: boolean }> = [] | ||
|
||
// Add current file operations | ||
if (context.recentOperations && context.recentOperations.length > 0) { | ||
context.recentOperations.forEach((op) => { | ||
if (op.content) { | ||
allOperations.push({ | ||
content: op.content, | ||
filepath: context.document!.uri.toString(), | ||
description: op.description, | ||
isGlobal: false, | ||
}) | ||
} | ||
}) | ||
} | ||
|
||
// Add global operations from other files | ||
if (context.globalRecentOperations && context.globalRecentOperations.length > 0) { | ||
context.globalRecentOperations.forEach((op) => { | ||
if (op.content) { | ||
allOperations.push({ | ||
content: op.content, | ||
filepath: op.filepath, | ||
description: op.description, | ||
isGlobal: true, | ||
}) | ||
} | ||
}) | ||
} | ||
|
||
if (allOperations.length === 0) { | ||
return "" | ||
} | ||
|
||
// Rank operations by similarity to code around cursor | ||
const rankedOps = rankSnippets( | ||
allOperations.map((op) => ({ | ||
content: op.content, | ||
filepath: op.filepath, | ||
})), | ||
windowAroundCursor, | ||
) | ||
|
||
// Take top 3 most relevant operations | ||
const topOperations = rankedOps.slice(0, 3) | ||
|
||
// Format with descriptions | ||
const contextParts = topOperations.map((ranked) => { | ||
const op = allOperations.find((o) => o.content === ranked.content && o.filepath === ranked.filepath) | ||
if (!op) return "" | ||
|
||
if (op.isGlobal) { | ||
const filename = op.filepath.split("/").pop() || op.filepath | ||
return `// Recent in ${filename}: ${op.description} (relevance: ${(ranked.score * 100).toFixed(0)}%)\n${ranked.content}` | ||
} else { | ||
return `// Recent: ${op.description} (relevance: ${(ranked.score * 100).toFixed(0)}%)\n${ranked.content}` | ||
} | ||
}) | ||
|
||
return contextParts.length > 0 ? `${contextParts.filter(Boolean).join("\n\n")}\n\n` : "" | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why?