Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Autodetect Docs V1 #3038

Merged
merged 21 commits into from
Nov 26, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions core/config/ProfileLifecycleManager.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { config } from "dotenv";

import {
BrowserSerializedContinueConfig,
ContinueConfig,
1 change: 1 addition & 0 deletions core/config/load.ts
Original file line number Diff line number Diff line change
@@ -527,6 +527,7 @@ function finalToBrowserConfig(
embeddingsProvider: final.embeddingsProvider?.id,
ui: final.ui,
experimental: final.experimental,
docs: final.docs,
};
}

6 changes: 6 additions & 0 deletions core/core.ts
Original file line number Diff line number Diff line change
@@ -735,6 +735,10 @@ export class Core {
return this.docsService.initStatuses();
});
on("docs/getSuggestedDocs", async (msg) => {
if (hasRequestedDocs) {
return;
} // TODO, remove, hack because of rerendering
hasRequestedDocs = true;
const suggestedDocs = await getAllSuggestedDocs(this.ide);
this.messenger.send("docs/suggestions", suggestedDocs);
});
@@ -842,3 +846,5 @@ export class Core {

// private
}

let hasRequestedDocs = false;
2 changes: 2 additions & 0 deletions core/index.d.ts
Original file line number Diff line number Diff line change
@@ -1189,6 +1189,7 @@ export interface BrowserSerializedContinueConfig {
reranker?: RerankerDescription;
experimental?: ExperimentalConfig;
analytics?: AnalyticsConfig;
docs?: SiteIndexingConfig[];
}

// DOCS SUGGESTIONS AND PACKAGE INFO
@@ -1210,6 +1211,7 @@ export type ParsedPackageInfo = {

export type PackageDetails = {
docsLink?: string;
docsLinkWarning?: string;
title?: string;
description?: string;
repo?: string;
39 changes: 13 additions & 26 deletions core/indexing/docs/suggestions/index.ts
Original file line number Diff line number Diff line change
@@ -63,42 +63,40 @@ export async function getAllSuggestedDocs(ide: IDE) {
);

// Parse package files and build map of language -> packages
const packagesByRegistry: Record<string, ParsedPackageInfo[]> = {};
const packagesByCrawler: Record<string, ParsedPackageInfo[]> = {};
PACKAGE_CRAWLERS.forEach((Crawler) => {
const crawler = new Crawler();
packagesByCrawler[crawler.packageRegistry] = [];
const packageFiles = packageFilesByRegistry[crawler.packageRegistry];
packageFiles.forEach((file) => {
const contents = fileContents.get(file.path);
if (!contents) {
return;
}
const packages = crawler.parsePackageFile(file, contents);
if (!packagesByRegistry[crawler.packageRegistry]) {
packagesByRegistry[crawler.packageRegistry] = [];
}
packagesByRegistry[crawler.packageRegistry].push(...packages);
packagesByCrawler[crawler.packageRegistry].push(...packages);
});
});

// Deduplicate packages per language
// TODO - this is where you would allow docs for different versions
// by e.g. using "name-version" as the map key instead of just name
// For now have not allowed
const languages = Object.keys(packagesByRegistry);
languages.forEach((language) => {
const packages = packagesByRegistry[language];
const registries = Object.keys(packagesByCrawler);
registries.forEach((registry) => {
const packages = packagesByCrawler[registry];
const uniquePackages = Array.from(
new Map(packages.map((pkg) => [pkg.name, pkg])).values(),
);
packagesByRegistry[language] = uniquePackages;
packagesByCrawler[registry] = uniquePackages;
});

// Get documentation links for all packages
const allDocsResults: PackageDocsResult[] = [];
await Promise.all(
PACKAGE_CRAWLERS.map(async (Crawler) => {
const crawler = new Crawler();
const packages = packagesByRegistry[crawler.packageRegistry];
const packages = packagesByCrawler[crawler.packageRegistry];
const docsByRegistry = await Promise.all(
packages.map(async (packageInfo) => {
try {
@@ -114,6 +112,11 @@ export async function getAllSuggestedDocs(ide: IDE) {
details: {
...details,
docsLink: details.docsLink,
docsLinkWarning: details.docsLink.includes("github.com")
? "Github docs not supported, find the docs site"
: details.docsLink.includes("docs")
? undefined
: "May not be a docs site, check the URL",
},
};
} catch (error) {
@@ -129,19 +132,3 @@ export async function getAllSuggestedDocs(ide: IDE) {
);
return allDocsResults;
}

// write me an interface PackageCrawler that contains:
// 1. property `language` to store a given language like "python" or "typescript"
// 2. has a method `getPackageFiles` which takes a list of file names and decides which ones match package/dependency files (e.g. package.json for typescript, requirements.txt for python, etc)
// 3. has a method `parsePackageFile` which returns a list of package name and version from a relevant package file, in a standardized format like semver
// 4. has a method `getDocumentationLink` to check for documentation link for a given package (e.g. GET `https://registry.npmjs.org/<package>` and find docs field for typescript, documentation link in the package metadata for PyPi, etc.)
// Then, write typescript classes to implement this typescript interface for the languages "python" and "typescript"

// I want to present the user with a list of dependencies and allow them to select which ones to index (embed) documentation for.
// In order to prevent duplicate file reads, the process will be like this:
// 1. take in a list of filepaths called `filepaths`
// 2. loop an array of PackageCrawler classes to build a map of `language` (string) to `packageFilePaths` (string[])
// 3. Get unique filepaths from `packageFilePaths` and build a map ` of filepath to file contents using an existing `readFile` function, and skipping file reads of already in the map
// Finally,
// Add a `` method to the interface and classes that returns
// Then, assemble the classes in an array, and write a function getAllSuggestedDocs that returns a map of `language` to an ar
2 changes: 1 addition & 1 deletion core/indexing/docs/suggestions/packageCrawlers/Python.ts
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@ export class PythonPackageCrawler implements PackageCrawler {
.split("\n")
.map((line) => {
const [name, version] = line.split("==");
return { name, version, packageFile: file, language: "python" };
return { name, version, packageFile: file, language: "py" };
})
.filter((pkg) => pkg.name && pkg.version);
}
1 change: 0 additions & 1 deletion core/protocol/core.ts
Original file line number Diff line number Diff line change
@@ -9,7 +9,6 @@ import type {
DiffLine,
FileSymbolMap,
IdeSettings,
IndexingStatusMap,
LLMFullCompletionOptions,
MessageContent,
ModelDescription,
4 changes: 2 additions & 2 deletions core/protocol/webview.ts
Original file line number Diff line number Diff line change
@@ -2,9 +2,9 @@ import { ConfigValidationError } from "../config/validation.js";

import type {
ContextItemWithId,
DocsSuggestions,
IndexingProgressUpdate,
IndexingStatus,
PackageDocsResult,
} from "../index.js";

export type ToWebviewFromIdeOrCoreProtocol = {
@@ -26,5 +26,5 @@ export type ToWebviewFromIdeOrCoreProtocol = {
getWebviewHistoryLength: [undefined, number];
signInToControlPlane: [undefined, void];
openDialogMessage: ["account", void];
"docs/suggestions": [DocsSuggestions, void];
"docs/suggestions": [PackageDocsResult[], void];
};
Loading