Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Autodetect Docs V1 #3038

Merged
merged 21 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions core/config/ProfileLifecycleManager.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { config } from "dotenv";

import {
BrowserSerializedContinueConfig,
ContinueConfig,
Expand Down
1 change: 1 addition & 0 deletions core/config/load.ts
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ function finalToBrowserConfig(
embeddingsProvider: final.embeddingsProvider?.id,
ui: final.ui,
experimental: final.experimental,
docs: final.docs,
};
}

Expand Down
17 changes: 12 additions & 5 deletions core/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { ControlPlaneClient } from "./control-plane/client";
import { streamDiffLines } from "./edit/streamDiffLines";
import { CodebaseIndexer, PauseToken } from "./indexing/CodebaseIndexer";
import DocsService from "./indexing/docs/DocsService";
import { getAllSuggestedDocs } from "./indexing/docs/suggestions";
import { defaultIgnoreFile } from "./indexing/ignore.js";
import Ollama from "./llm/llms/Ollama";
import { createNewPromptFileV2 } from "./promptFiles/v2/createNewPromptFile";
Expand All @@ -28,11 +29,7 @@ import { DevDataSqliteDb } from "./util/devdataSqlite";
import { fetchwithRequestOptions } from "./util/fetchWithOptions";
import { GlobalContext } from "./util/GlobalContext";
import historyManager from "./util/history";
import {
editConfigJson,
getConfigJsonPath,
setupInitialDotContinueDirectory,
} from "./util/paths";
import { editConfigJson, setupInitialDotContinueDirectory } from "./util/paths";
import { Telemetry } from "./util/posthog";
import { getSymbolsForManyFiles } from "./util/treeSitter";
import { TTS } from "./util/tts";
Expand Down Expand Up @@ -737,6 +734,14 @@ export class Core {
on("indexing/initStatuses", async (msg) => {
return this.docsService.initStatuses();
});
on("docs/getSuggestedDocs", async (msg) => {
if (hasRequestedDocs) {
return;
} // TODO, remove, hack because of rerendering
hasRequestedDocs = true;
const suggestedDocs = await getAllSuggestedDocs(this.ide);
this.messenger.send("docs/suggestions", suggestedDocs);
});
//

on("didChangeSelectedProfile", (msg) => {
Expand Down Expand Up @@ -841,3 +846,5 @@ export class Core {

// private
}

let hasRequestedDocs = false;
38 changes: 38 additions & 0 deletions core/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1220,4 +1220,42 @@ export interface BrowserSerializedContinueConfig {
reranker?: RerankerDescription;
experimental?: ExperimentalConfig;
analytics?: AnalyticsConfig;
docs?: SiteIndexingConfig[];
}

// DOCS SUGGESTIONS AND PACKAGE INFO
export interface FilePathAndName {
path: string;
name: string;
}

export interface PackageFilePathAndName extends FilePathAndName {
packageRegistry: string; // e.g. npm, pypi
}

export type ParsedPackageInfo = {
name: string;
packageFile: PackageFilePathAndName;
language: string;
version: string;
};

export type PackageDetails = {
docsLink?: string;
docsLinkWarning?: string;
title?: string;
description?: string;
repo?: string;
license?: string;
};

export type PackageDetailsSuccess = PackageDetails & {
docsLink: string;
};

export type PackageDocsResult = {
packageInfo: ParsedPackageInfo;
} & (
| { error: string; details?: never }
| { details: PackageDetailsSuccess; error?: never }
);
134 changes: 134 additions & 0 deletions core/indexing/docs/suggestions/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import {
PackageDocsResult,
FilePathAndName,
PackageFilePathAndName,
IDE,
PackageDetails,
ParsedPackageInfo,
} from "../../..";
import { walkDir } from "../../walkDir";

import { PythonPackageCrawler } from "./packageCrawlers/Python";
import { NodePackageCrawler } from "./packageCrawlers/TsJs";

const PACKAGE_CRAWLERS = [NodePackageCrawler, PythonPackageCrawler];

export interface PackageCrawler {
packageRegistry: string;
getPackageFiles(files: FilePathAndName[]): PackageFilePathAndName[];
parsePackageFile(
file: PackageFilePathAndName,
contents: string,
): ParsedPackageInfo[];
getPackageDetails(packageInfo: ParsedPackageInfo): Promise<PackageDetails>;
}

export async function getAllSuggestedDocs(ide: IDE) {
const workspaceDirs = await ide.getWorkspaceDirs();
const results = await Promise.all(
workspaceDirs.map((dir) => {
return walkDir(dir, ide);
}),
);
const allPaths = results.flat(); // TODO only get files, not dirs. Not critical for now
const allFiles = allPaths.map((path) => ({
path,
name: path.split(/[\\/]/).pop()!,
}));

// Build map of language -> package files
const packageFilesByRegistry: Record<string, PackageFilePathAndName[]> = {};
for (const Crawler of PACKAGE_CRAWLERS) {
const crawler = new Crawler();
const packageFilePaths = crawler.getPackageFiles(allFiles);
packageFilesByRegistry[crawler.packageRegistry] = packageFilePaths;
}

// Get file contents for all unique package files
const uniqueFilePaths = Array.from(
new Set(
Object.values(packageFilesByRegistry).flatMap((files) =>
files.map((file) => file.path),
),
),
);
const fileContentsArray = await Promise.all(
uniqueFilePaths.map(async (path) => {
const contents = await ide.readFile(path);
return { path, contents };
}),
);
const fileContents = new Map(
fileContentsArray.map(({ path, contents }) => [path, contents]),
);

// Parse package files and build map of language -> packages
const packagesByCrawler: Record<string, ParsedPackageInfo[]> = {};
PACKAGE_CRAWLERS.forEach((Crawler) => {
const crawler = new Crawler();
packagesByCrawler[crawler.packageRegistry] = [];
const packageFiles = packageFilesByRegistry[crawler.packageRegistry];
packageFiles.forEach((file) => {
const contents = fileContents.get(file.path);
if (!contents) {
return;
}
const packages = crawler.parsePackageFile(file, contents);
packagesByCrawler[crawler.packageRegistry].push(...packages);
});
});

// Deduplicate packages per language
// TODO - this is where you would allow docs for different versions
// by e.g. using "name-version" as the map key instead of just name
// For now have not allowed
const registries = Object.keys(packagesByCrawler);
registries.forEach((registry) => {
const packages = packagesByCrawler[registry];
const uniquePackages = Array.from(
new Map(packages.map((pkg) => [pkg.name, pkg])).values(),
);
packagesByCrawler[registry] = uniquePackages;
});

// Get documentation links for all packages
const allDocsResults: PackageDocsResult[] = [];
await Promise.all(
PACKAGE_CRAWLERS.map(async (Crawler) => {
const crawler = new Crawler();
const packages = packagesByCrawler[crawler.packageRegistry];
const docsByRegistry = await Promise.all(
packages.map(async (packageInfo) => {
try {
const details = await crawler.getPackageDetails(packageInfo);
if (!details.docsLink) {
return {
packageInfo,
error: `No documentation link found for ${packageInfo.name}`,
};
}
return {
packageInfo,
details: {
...details,
docsLink: details.docsLink,
docsLinkWarning: details.docsLink.includes("github.com")
? "Github docs not supported, find the docs site"
: details.docsLink.includes("docs")
? undefined
: "May not be a docs site, check the URL",
},
};
} catch (error) {
return {
packageInfo,
error: `Error getting package details for ${packageInfo.name}`,
};
}
}),
);
allDocsResults.push(...docsByRegistry);
}),
);
return allDocsResults;
}
63 changes: 63 additions & 0 deletions core/indexing/docs/suggestions/packageCrawlers/Python.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import { PackageCrawler } from "..";
import {
FilePathAndName,
PackageDetails,
PackageFilePathAndName,
ParsedPackageInfo,
} from "../../../..";

export class PythonPackageCrawler implements PackageCrawler {
packageRegistry = "pypi";

getPackageFiles(files: FilePathAndName[]): PackageFilePathAndName[] {
// For Python, we typically look for files like requirements.txt or Pipfile
return files
.filter(
(file) => file.name === "requirements.txt" || file.name === "Pipfile",
)
.map((file) => ({
...file,
packageRegistry: "pypi",
}));
}

parsePackageFile(
file: PackageFilePathAndName,
contents: string,
): ParsedPackageInfo[] {
// Assume the fileContent is a string from a requirements.txt formatted file
return contents
.split("\n")
.map((line) => {
const [name, version] = line.split("==");
return { name, version, packageFile: file, language: "py" };
})
.filter((pkg) => pkg.name && pkg.version);
}

async getPackageDetails(
packageInfo: ParsedPackageInfo,
): Promise<PackageDetails> {
// Fetch metadata from PyPI to find the documentation link
const response = await fetch(
`https://pypi.org/pypi/${packageInfo.name}/json`,
);
if (!response.ok) {
throw new Error(`Could not fetch data for package ${packageInfo.name}`);
}
const data = await response.json();
const homePage = data?.info?.home_page as string | undefined;

return {
docsLink:
(data?.info?.project_urls?.Documentation as string | undefined) ??
homePage,
title: data?.info?.name as string | undefined,
description: data?.info?.summary as string | undefined,
repo:
(data?.info?.project_urls?.Repository as string | undefined) ??
homePage,
license: data?.info?.license as string | undefined,
};
}
}
72 changes: 72 additions & 0 deletions core/indexing/docs/suggestions/packageCrawlers/TsJs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { PackageCrawler } from "..";
import {
FilePathAndName,
PackageDetails,
PackageFilePathAndName,
ParsedPackageInfo,
} from "../../../..";

export class NodePackageCrawler implements PackageCrawler {
packageRegistry = "npm";

getPackageFiles(files: FilePathAndName[]): PackageFilePathAndName[] {
// For Javascript/TypeScript, we look for package.json file
return files
.filter((file) => file.name === "package.json")
.map((file) => ({
...file,
packageRegistry: this.packageRegistry,
}));
}

parsePackageFile(
file: PackageFilePathAndName,
contents: string,
): ParsedPackageInfo[] {
// Parse the package.json content
const jsonData = JSON.parse(contents) as Record<string, Object>;
const dependencies = Object.entries(jsonData.dependencies || {}).concat(
Object.entries(jsonData.devDependencies || {}),
);

// Filter out types packages and check if typescript is present
let foundTypes = false;
const filtered = dependencies.filter(([name, _]) => {
if (name.startsWith("@types/")) {
foundTypes = true;
return false;
}
if (name.includes("typescript")) {
foundTypes = true;
}
return true;
});
return filtered.map(([name, version]) => ({
name,
version,
packageFile: file,
language: foundTypes ? "ts" : "js",
}));
}

async getPackageDetails(
packageInfo: ParsedPackageInfo,
): Promise<PackageDetails> {
const { name } = packageInfo;
// Fetch metadata from the NPM registry to find the documentation link
const response = await fetch(`https://registry.npmjs.org/${name}`);
if (!response.ok) {
throw new Error(`Could not fetch data for package ${name}`);
}
const data = await response.json();
return {
docsLink: data.homepage as string | undefined,
title: name, // package.json doesn't have specific title field
description: data.description as string | undefined,
repo: Array.isArray(data.repository)
? (data.respository[0]?.url as string | undefined)
: undefined,
license: data.license as string | undefined,
};
}
}
2 changes: 1 addition & 1 deletion core/protocol/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import type {
DiffLine,
FileSymbolMap,
IdeSettings,
IndexingStatusMap,
LLMFullCompletionOptions,
MessageContent,
ModelDescription,
Expand Down Expand Up @@ -167,6 +166,7 @@ export type ToCoreFromIdeOrWebviewProtocol = {
"indexing/abort": [{ type: string; id: string }, void];
"indexing/setPaused": [{ type: string; id: string; paused: boolean }, void];
"indexing/initStatuses": [undefined, void];
"docs/getSuggestedDocs": [undefined, void];

addAutocompleteModel: [{ model: ModelDescription }, void];

Expand Down
Loading
Loading