Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
RomneyDa committed Nov 26, 2024
2 parents 346796c + 56b5240 commit 0743780
Show file tree
Hide file tree
Showing 19 changed files with 372 additions and 1,076 deletions.
3 changes: 3 additions & 0 deletions .changes/extensions/vscode/0.8.59.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 0.8.59 - 2024-11-25
### Fixed
* Hotfix for Ollama onboarding
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ import { streamLines } from "../../../diff/util";
import { DEFAULT_AUTOCOMPLETE_OPTS } from "../../../util/parameters";
import { HelperVars } from "../../util/HelperVars";

import { stopAtStopTokens } from "./charStream";
import { stopAtStartOf, stopAtStopTokens } from "./charStream";
import {
avoidEmptyComments,
avoidPathLine,
noDoubleNewlineAfterClosingBracket,
showWhateverWeHaveAtXMs,
skipPrefixes,
stopAtLines,
stopAtRepeatingLines,
stopAtSimilarLine,
stopNCharsAfterClosingBracket,
streamWithNewLines,
} from "./lineStream";

Expand All @@ -28,6 +28,7 @@ export class StreamTransformPipeline {
let charGenerator = generator;

charGenerator = stopAtStopTokens(generator, stopTokens);
charGenerator = stopAtStartOf(charGenerator, suffix);
for (const charFilter of helper.lang.charFilters ?? []) {
charGenerator = charFilter({
chars: charGenerator,
Expand All @@ -48,7 +49,7 @@ export class StreamTransformPipeline {
);
lineGenerator = avoidPathLine(lineGenerator, helper.lang.singleLineComment);
lineGenerator = skipPrefixes(lineGenerator);
lineGenerator = stopNCharsAfterClosingBracket(lineGenerator);
lineGenerator = noDoubleNewlineAfterClosingBracket(lineGenerator);

for (const lineFilter of helper.lang.lineFilters ?? []) {
lineGenerator = lineFilter({ lines: lineGenerator, fullStop });
Expand Down
134 changes: 87 additions & 47 deletions core/autocomplete/filtering/streamTransforms/charStream.test.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
import { stopAtStopTokens } from "./charStream";
import { stopAtStartOf, stopAtStopTokens } from "./charStream";

describe("stopAtStopTokens", () => {
async function* createMockStream(chunks: string[]): AsyncGenerator<string> {
for (const chunk of chunks) {
yield chunk;
}
async function* createMockStream(chunks: string[]): AsyncGenerator<string> {
for (const chunk of chunks) {
yield chunk;
}
}

async function streamToString(stream: AsyncGenerator<string>): Promise<string> {
let result = "";
for await (const chunk of stream) {
result += chunk;
}
return result;
}

describe("stopAtStopTokens", () => {
it("should yield characters until a stop token is encountered", async () => {
const mockStream = createMockStream(["Hello", " world", "! Stop", "here"]);
const stopTokens = ["Stop"];
Expand All @@ -30,12 +38,7 @@ describe("stopAtStopTokens", () => {
const stopTokens = ["END", "STOP", "HALT"];
const result = stopAtStopTokens(mockStream, stopTokens);

const output = [];
for await (const char of result) {
output.push(char);
}

expect(output.join("")).toBe("This is a test. ");
expect(await streamToString(result)).toBe("This is a test. ");
});

it("should handle stop tokens split across chunks", async () => {
Expand Down Expand Up @@ -67,25 +70,15 @@ describe("stopAtStopTokens", () => {
const stopTokens = ["END"];
const result = stopAtStopTokens(mockStream, stopTokens);

const output = [];
for await (const char of result) {
output.push(char);
}

expect(output.join("")).toBe("This is a complete stream");
expect(await streamToString(result)).toBe("This is a complete stream");
});

it("should handle empty chunks", async () => {
const mockStream = createMockStream(["Hello", "", " world", "", "! STOP"]);
const stopTokens = ["STOP"];
const result = stopAtStopTokens(mockStream, stopTokens);

const output = [];
for await (const char of result) {
output.push(char);
}

expect(output.join("")).toBe("Hello world! ");
expect(await streamToString(result)).toBe("Hello world! ");
});

it("should handle stop token at the beginning of the stream", async () => {
Expand All @@ -106,12 +99,7 @@ describe("stopAtStopTokens", () => {
const stopTokens = ["STOP"];
const result = stopAtStopTokens(mockStream, stopTokens);

const output = [];
for await (const char of result) {
output.push(char);
}

expect(output.join("")).toBe("Hello world");
expect(await streamToString(result)).toBe("Hello world");
});

it("should handle multiple stop tokens of different lengths", async () => {
Expand All @@ -124,25 +112,15 @@ describe("stopAtStopTokens", () => {
const stopTokens = ["STOP", "END", "HALT"];
const result = stopAtStopTokens(mockStream, stopTokens);

const output = [];
for await (const char of result) {
output.push(char);
}

expect(output.join("")).toBe("This is a test with multiple ");
expect(await streamToString(result)).toBe("This is a test with multiple ");
});

it("should handle an empty stream", async () => {
const mockStream = createMockStream([]);
const stopTokens = ["STOP"];
const result = stopAtStopTokens(mockStream, stopTokens);

const output = [];
for await (const char of result) {
output.push(char);
}

expect(output.join("")).toBe("");
expect(await streamToString(result)).toBe("");
});

it("should handle an empty stop tokens array", async () => {
Expand All @@ -166,11 +144,73 @@ describe("stopAtStopTokens", () => {
];
const result = stopAtStopTokens(mockStream, stopTokens);

const output = [];
for await (const char of result) {
output.push(char);
}
expect(await streamToString(result)).toBe("Hello world!");
});
});

expect(output.join("")).toBe("Hello world!");
describe("stopAtStartOf", () => {
const sampleCode = ` {
method: "GET",
headers: {
"Content-Type": "application/json",
Authorization: \`Bearer \${this.workOsAccessToken}\`,
},
},
);
const data = await response.json();
return data.items;
}
async getContextItems(
query: string,
extras: ContextProviderExtras,
): Promise<ContextItem[]> {
const response = await extras.fetch(
new URL(
\`/proxy/context/\${this.options.id}/retrieve\`,
controlPlaneEnv.CONTROL_PLANE_URL,
),
`;
/* Some LLMs, such as Codestral, repeat the suffix of the query. To test our filtering, we cut the sample code at random positions, remove a part of the input
and construct a response, containing the removed part and the suffix. The goal of the stopAtStartOf() method is to detect the start of the suffix in the response */
it("should stop if the start of the suffix is reached", async () => {
const removeLength = 10;
for (let i = 0; i < sampleCode.length - removeLength - 20; i++) {
const removed = sampleCode.slice(i, i + removeLength);
const suffix = sampleCode.slice(i + removeLength);
const response = removed + suffix;

// split the response but keep spaces
const mockStream = createMockStream(response.split(/(?! )/g));
const result = stopAtStartOf(mockStream, suffix);

const resultStr = await streamToString(result);
if (resultStr !== removed) {
throw new Error(
`i=${i} result:\n${resultStr}\n\nremoved:\n${removed}\n\nsuffix:\n${suffix}`,
);
}
}
});
it("should stop if the start of the suffix is reached, even if the suffix has a prefix", async () => {
const removeLength = 10;
for (let i = 0; i < sampleCode.length - removeLength - 20; i++) {
const removed = sampleCode.slice(i, i + removeLength);
let suffix = sampleCode.slice(i + removeLength);
const response = removed + suffix;
// add a prefix to the suffix
suffix = "strange words;\n which start the suffix#" + suffix;

// split the response but keep spaces
const mockStream = createMockStream(response.split(/(?! )/g));
const result = stopAtStartOf(mockStream, suffix);

const resultStr = await streamToString(result);
if (resultStr !== removed) {
throw new Error(
`i=${i} result:\n${resultStr}\n\nremoved:\n${removed}\n\nsuffix:\n${suffix}`,
);
}
}
});
});
54 changes: 54 additions & 0 deletions core/autocomplete/filtering/streamTransforms/charStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,57 @@ export async function* stopAtStopTokens(
yield char;
}
}

/**
* Asynchronously yields characters from the input stream, stopping if a sequence contained in the beginning of the suffix is encountered.
* */
export async function* stopAtStartOf(
stream: AsyncGenerator<string>,
suffix: string,
sequenceLength: number = 20,
): AsyncGenerator<string> {
if (suffix.length < sequenceLength) {
for await (const chunk of stream) {
yield chunk;
}
return;
}

const n = Math.min(suffix.length, 3 * sequenceLength);
let prev = new Array(n + 1).fill(0);
let res = 0;
let buffer = "";

for await (const chunk of stream) {
const s1 = chunk;
const m = chunk.length;
for (let i = 1; i <= m; i++) {
// Create a temporary array to store the current row
let curr = new Array(n + 1).fill(0);
for (let j = 1; j <= n; j++) {
if (s1[i - 1] === suffix[j - 1]) {
curr[j] = prev[j - 1] + 1;
res = Math.max(res, curr[j]);
} else {
curr[j] = 0;
}
}

// Move the current row's data to the previous row
prev = curr;

if (res > sequenceLength) {
return;
}

buffer += s1[i - 1];

while (buffer.length > sequenceLength) {
yield buffer[0];
buffer = buffer.slice(1);
}
}
}

yield buffer;
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ describe("lineStream", () => {

describe("stopAtSimilarLine", () => {
it("should stop at the exact same line", async () => {
const lineToTest = "const x = 6;";
const lineToTest = "const x = 6";
const linesGenerator = await getLineGenerator([
"console.log();",
"const y = () => {};",
Expand All @@ -116,7 +116,7 @@ describe("lineStream", () => {
expect(mockFullStop).toHaveBeenCalledTimes(1);
});

it.only("should stop at a similar line", async () => {
it("should stop at a similar line", async () => {
const lineToTest = "const x = 6;";
const linesGenerator = await getLineGenerator([
"console.log();",
Expand Down
55 changes: 21 additions & 34 deletions core/autocomplete/filtering/streamTransforms/lineStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ export const LINES_TO_REMOVE_BEFORE_START = [
"<COMPLETION>",
"[CODE]",
"<START EDITING HERE>",
"{{FILL_HERE}}"
"{{FILL_HERE}}",
];

export const ENGLISH_START_PHRASES = [
Expand Down Expand Up @@ -548,21 +548,32 @@ export async function* showWhateverWeHaveAtXMs(
}
}

export async function* stopNCharsAfterClosingBracket(
export async function* noDoubleNewlineAfterClosingBracket(
lines: LineStream,
n: number = 20,
): LineStream {
const bracketTypeCounts = new Map<string, number>();
let charsToStopAt: number | null = null;

for await (const line of lines) {
let outputLine = "";
let i = 0;
if (line.trim() === "") {
// Double newline detected
// Check if any bracket counts are negative
let hasNegativeCount = false;
for (const count of bracketTypeCounts.values()) {
if (count < 0) {
hasNegativeCount = true;
break;
}
}
if (hasNegativeCount) {
// Stop the generator if we've closed brackets we didn't open
return;
}
}

while (i < line.length) {
const char = line[i];
yield line;

// Update bracket counts
// Update bracket counts
for (const char of line) {
if (BRACKETS[char]) {
// It's an opening bracket
const count = bracketTypeCounts.get(char) || 0;
Expand All @@ -571,32 +582,8 @@ export async function* stopNCharsAfterClosingBracket(
// It's a closing bracket
const openingBracket = BRACKETS_REVERSE[char];
const count = bracketTypeCounts.get(openingBracket) || 0;
const newCount = count - 1;
bracketTypeCounts.set(openingBracket, newCount);

if (newCount < 0 && charsToStopAt === null) {
// Unmatched closing bracket detected
charsToStopAt = n;
}
bracketTypeCounts.set(openingBracket, count - 1);
}

// Add the character to the output line
outputLine += char;

// If we've started counting down, decrement the remaining characters
if (charsToStopAt !== null) {
charsToStopAt -= 1;
if (charsToStopAt <= 0) {
// Yield the output line up to this point and stop the generator
yield outputLine;
return;
}
}

i += 1;
}

// Yield whatever we've accumulated for this line
yield outputLine;
}
}
Loading

0 comments on commit 0743780

Please sign in to comment.