diff --git a/.gitignore b/.gitignore index 75c402b..623a217 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ # misc .DS_Store *.pem +*.bak # debug npm-debug.log* diff --git a/assets/chrome.png b/assets/chrome.png new file mode 100644 index 0000000..d01b5c2 Binary files /dev/null and b/assets/chrome.png differ diff --git a/package.json b/package.json index 0638714..75631ab 100644 --- a/package.json +++ b/package.json @@ -25,8 +25,7 @@ "react": "18.2.0", "react-dom": "18.2.0", "tailwindcss": "3.4.1", - "util": "^0.12.5", - "uuid": "^13.0.0" + "util": "^0.12.5" }, "devDependencies": { "@babel/preset-env": "^7.26.9", @@ -63,6 +62,10 @@ }, "manifest": { "permissions": [ + "tabs", + "history", + "activeTab", + "scripting", "storage", "cookies" ], @@ -72,4 +75,4 @@ ] }, "type": "module" -} \ No newline at end of file +} diff --git a/src/background.ts b/src/background.ts index c86f56f..19b8398 100644 --- a/src/background.ts +++ b/src/background.ts @@ -1,3 +1,170 @@ +// This is used to get all tabs in the browser, and some of their conten +chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { + // Handle getTabs request for Chrome Tabs connection + if (request.action === "getTabs") { + chrome.tabs.query({}, (tabs) => { + if (chrome.runtime.lastError) { + sendResponse({ error: chrome.runtime.lastError.message }); + } else { + sendResponse({ tabs: tabs }); + } + }); + return true; + } + + // Handle getTabsWithContent request + if (request.action === "getTabsWithContent") { + chrome.tabs.query({}, async (tabs) => { + if (chrome.runtime.lastError) { + sendResponse({ error: chrome.runtime.lastError.message }); + return; + } + + const tabsWithContent = []; + + for (const tab of tabs) { + const tabData = { ...tab, pageContent: '' }; // Add pageContent property + + // Try to get page content for each tab + try { + if (tab.id && tab.url && !tab.url.startsWith('chrome://') && !tab.url.startsWith('chrome-extension://')) { + // Execute content script to get page text + const results = await chrome.scripting.executeScript({ + target: { tabId: tab.id }, + func: getPageContent, // Use 'func' instead of 'function' + }); + + if (results && results[0] && results[0].result) { + tabData.pageContent = results[0].result; + } + } + } catch (error) { + console.error(`Could not get content for tab ${tab.id}:`, error); + // Set a fallback description + tabData.pageContent = `Content from ${tab.url ? new URL(tab.url).hostname : 'unknown site'} - unable to read page content`; + } + + tabsWithContent.push(tabData); + } + + sendResponse({ tabs: tabsWithContent }); + }); + return true; + } + + // Don't interfere with other message handlers + return false; +}); + +// This gets the page content from a tab. +function getPageContent() { + try { + const title = document.title || ''; + const url = window.location.href; + const domain = window.location.hostname; + + // Get ALL visible text from the page + let allText = ''; + + // Method 1: Try to get all text from body + if (document.body) { + // Get all text content, which automatically excludes HTML tags + allText = document.body.innerText || document.body.textContent || ''; + } + + // If body approach fails, try document-wide text extraction + if (!allText || allText.length < 100) { + // Get all text nodes in the document + const walker = document.createTreeWalker( + document.body || document.documentElement, + NodeFilter.SHOW_TEXT, + { + acceptNode: function(node) { + // Skip script, style, and other non-visible content + const parent = node.parentElement; + if (!parent) return NodeFilter.FILTER_REJECT; + + const tagName = parent.tagName.toLowerCase(); + if (['script', 'style', 'noscript', 'iframe', 'object'].includes(tagName)) { + return NodeFilter.FILTER_REJECT; + } + + // Skip if parent is hidden + const style = window.getComputedStyle(parent); + if (style.display === 'none' || style.visibility === 'hidden') { + return NodeFilter.FILTER_REJECT; + } + + // Only accept text nodes with meaningful content + const text = node.textContent?.trim() || ''; + if (text.length < 3) return NodeFilter.FILTER_REJECT; + + return NodeFilter.FILTER_ACCEPT; + } + } + ); + + const textNodes = []; + let node; + while (node = walker.nextNode()) { + const text = node.textContent?.trim(); + if (text && text.length > 2) { + textNodes.push(text); + } + } + + allText = textNodes.join(' '); + } + + // Clean up the text + allText = allText + .replace(/\s+/g, ' ') // Replace multiple whitespace with single space + .replace(/\n+/g, ' ') // Replace newlines with spaces + .replace(/\t+/g, ' ') // Replace tabs with spaces + .trim(); + + // Take a reasonable sample of the text (first 300 chars) + const textSample = allText.substring(0, 300); + + // Combine title and text content + let result = ''; + if (title && title.trim()) { + result += `${title.trim()}. `; + } + + if (textSample && textSample.length > 10) { + // Remove title from content if it's repeated + let contentText = textSample; + if (title && textSample.toLowerCase().startsWith(title.toLowerCase())) { + contentText = textSample.substring(title.length).trim(); + if (contentText.startsWith('.') || contentText.startsWith('-')) { + contentText = contentText.substring(1).trim(); + } + } + + if (contentText.length > 10) { + result += contentText; + } + } + + // Generic fallback if no meaningful content found + if (!result.trim() || result.trim().length < 20) { + result = `Content from ${domain} - ${title || url.split('/').pop() || 'webpage'}`; + } + + return result || `Page from ${domain}`; + + } catch (error) { + console.error('Error extracting page content:', error); + + // Simple fallback + const domain = window.location.hostname; + const title = document.title || ''; + + return title || `Content from ${domain}`; + } +} + // This is used to register cookies in the browser chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { if (request.action === "setCookie") { diff --git a/src/connection_manager.tsx b/src/connection_manager.tsx index 2e48d7a..37cec77 100644 --- a/src/connection_manager.tsx +++ b/src/connection_manager.tsx @@ -6,9 +6,10 @@ import { GoogleScholarConnection } from "./connections/googleScholar/connection" import { WikipediaSegmentConnection } from "./connections/wikipediaSegment/connection"; import { GmailConnection } from "./connections/Gmail/connection"; import { LinkedInConnection } from "./connections/Linkedin/connection"; +import { ChromeTabsConnection } from "./connections/chromeTabs/connection"; -export const CONNECTIONS = [GmailConnection, WikipediaSegmentConnection, WikipediaReferencesConnection, GoogleConnection, PubmedConnection, GoogleDocsConnection, GoogleScholarConnection,LinkedInConnection]; +export const CONNECTIONS = [GmailConnection, WikipediaSegmentConnection, WikipediaReferencesConnection, GoogleConnection, PubmedConnection, GoogleDocsConnection, GoogleScholarConnection,LinkedInConnection, ChromeTabsConnection]; export const searchConnections = (url: string, ) => { const connections = CONNECTIONS.filter(connection => connection.trigger(url)); diff --git a/src/connections/Linkedin/connection.tsx b/src/connections/Linkedin/connection.tsx index 63422d8..4917301 100644 --- a/src/connections/Linkedin/connection.tsx +++ b/src/connections/Linkedin/connection.tsx @@ -3,7 +3,7 @@ import { GenerationProgress } from "../types"; import { getSpacePortal, registerAuthCookies, reqSpaceCreation } from "../../driver"; import wikiIcon from "data-base64:../../../assets/wiki.png"; -import { v4 as uuidv4 } from 'uuid'; +import { getUuidV4 } from "../../driver"; @@ -65,7 +65,7 @@ const createSpace = async ( const company = row[companyIdx]; const url = linkIdx !== -1 ? row[linkIdx] : ""; result.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: `Applied Job: ${title}`, text: `Applied to ${title} at ${company}`, link: url, @@ -122,7 +122,7 @@ const createSpace = async ( const name = document.querySelector("h1.text-heading-xlarge")?.textContent?.trim() || "Unknown Name"; const headline = document.querySelector(".text-body-medium.break-words")?.textContent?.trim() || ""; extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: name, text: sanitize(headline), link: window.location.href, @@ -139,7 +139,7 @@ const createSpace = async ( const about = aboutSection?.innerText?.trim(); if (about) { extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: "About", text: sanitize(about), link: window.location.href, @@ -161,7 +161,7 @@ const createSpace = async ( const description = entry.innerText?.trim(); if (jobTitle && description) { extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: `Experience: ${jobTitle}`, text: sanitize(description), link: window.location.href, @@ -184,7 +184,7 @@ const createSpace = async ( const eduDetails = entry.innerText?.trim(); if (school && eduDetails) { extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: `Education: ${school}`, text: sanitize(eduDetails), link: window.location.href, @@ -206,7 +206,7 @@ const createSpace = async ( if (!seen.has(connectionUrl)) { seen.add(connectionUrl); extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: `Connection: ${connectionName}`, text: `Connected with ${connectionName}`, link: connectionUrl, @@ -232,7 +232,7 @@ if (activitySection) { const postContent = card.textContent?.trim().replace(/\s+/g, " ") || "LinkedIn Activity"; extractedData.push({ - uuid: uuidv4(), + uuid: getUuidV4(), title: `Activity: ${postContent.slice(0, 40)}...`, text: postContent, link: postUrl, @@ -269,7 +269,7 @@ const getMessagesFromIframe = async (): Promise => { : "https://www.linkedin.com/messaging/"; return { - uuid: uuidv4(), + uuid: getUuidV4(), title: `Message with ${name}`, text: `${timestamp} - ${snippet}`, link: threadUrl, @@ -312,7 +312,7 @@ const getFollowedCompanies = async (): Promise => { const link = (card.querySelector("a") as HTMLAnchorElement)?.href || ""; return { - uuid: uuidv4(), + uuid: getUuidV4(), title: `Following: ${name}`, text: subtitle, link, diff --git a/src/connections/chromeTabs/connection.tsx b/src/connections/chromeTabs/connection.tsx new file mode 100644 index 0000000..e76b878 --- /dev/null +++ b/src/connections/chromeTabs/connection.tsx @@ -0,0 +1,280 @@ +import type { MantisConnection, injectUIType, onMessageType, registerListenersType, setProgressType, establishLogSocketType } from "../types"; +import { GenerationProgress } from "../types"; + +import chromeIcon from "data-base64:../../../assets/chrome.png"; +import { getSpacePortal, registerAuthCookies, reqSpaceCreation } from "../../driver"; + +const trigger = (url: string) => { + return url.includes("google.com/search"); +} + +interface TabWithContent extends chrome.tabs.Tab { + pageContent?: string; +} + +const getTabsWithContentViaMessage = (): Promise => { + return new Promise((resolve, reject) => { + chrome.runtime.sendMessage({ action: "getTabsWithContent" }, (response) => { + if (chrome.runtime.lastError) { + reject(new Error(chrome.runtime.lastError.message)); + } else if (response.error) { + reject(new Error(response.error)); + } else { + resolve(response.tabs || []); + } + }); + }); +}; + + +const createSpace = async (injectUI: injectUIType, setProgress: setProgressType, onMessage: onMessageType, registerListeners: registerListenersType, establishLogSocket: establishLogSocketType) => { + setProgress(GenerationProgress.GATHERING_DATA); + + const extractedData = []; + + try { + // Get tabs via message passing + const tabs = await getTabsWithContentViaMessage(); + + if (!tabs || tabs.length === 0) { + throw new Error('No tabs found'); + } + + // Process each tab (no duplication, no domain grouping) + tabs.forEach((tab, index) => { + if (tab.title && tab.url) { + let domain = ''; + try { + domain = new URL(tab.url).hostname; + } catch (e) { + domain = 'unknown'; + } + + // Get page content if available + let pageContent = ''; + if (tab.pageContent) { + pageContent = tab.pageContent; + } else { + pageContent = `Page from ${domain}`; + } + + extractedData.push({ + title: tab.title, + semantic_title: `${tab.active ? 'Active' : 'Background'} tab: ${tab.title}`, + link: tab.url, + snippet: `Tab ${index + 1}: ${pageContent}` + }); + } + }); + + // Check if we have enough data + if (extractedData.length < 3) { + throw new Error('Not enough tabs open for meaningful space creation'); + } + + setProgress(GenerationProgress.CREATING_SPACE); + + // Use automatic retry for space creation + const spaceData = await createSpaceWithAutoRetry(extractedData, establishLogSocket, `Chrome Tabs Space (${tabs.length} tabs)`); + + setProgress(GenerationProgress.INJECTING_UI); + + const spaceId = spaceData.space_id; + const createdWidget = await injectUI(spaceId, onMessage, registerListeners); + + setProgress(GenerationProgress.COMPLETED); + + return { spaceId, createdWidget }; + + } catch (error) { + console.error('Error in Chrome Tabs connection:', error); + + const errorMessage = error.message || error.toString(); + if (errorMessage.includes('Dataset too small') || + errorMessage.includes('minimum 100 rows are required')) { + showDatasetTooSmallError(extractedData.length); + return null; + } + + if (errorMessage.includes('Not enough tabs') || errorMessage.includes('No tabs found')) { + showNoTabsError(); + return null; + } + + throw error; + } +} + +// New function for automatic retry +const createSpaceWithAutoRetry = async (extractedData: { title: string; semantic_title: string; link: string; snippet: string; }[], establishLogSocket: establishLogSocketType, title: string, maxRetries = 5) => { + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + + if (attempt > 1) { + // Wait for server to finish background processing + await new Promise(resolve => setTimeout(resolve, 3000)); + } + + return await reqSpaceCreation(extractedData, { + "title": "title", + "semantic_title": "semantic", + "link": "links", + "snippet": "semantic" + }, establishLogSocket, title); + + } catch (error) { + const errorMessage = error.message || error.toString(); + + // Check if it's a timeout error and we have retries left + if ((errorMessage.includes('504') || + errorMessage.includes('timeout') || + errorMessage.includes('Gateway Time-out')) && + attempt < maxRetries) { + + continue; // Try again + } + + // If it's not a timeout or we're out of retries, throw the error + throw error; + } + } +}; + +// Error handlers +const showDatasetTooSmallError = (dataCount: number) => { + const errorDiv = document.createElement('div'); + errorDiv.style.cssText = ` + position: fixed; + top: 20px; + right: 20px; + background: linear-gradient(135deg, #ff6b6b, #ee5a52); + color: white; + padding: 20px; + border-radius: 12px; + box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3); + z-index: 10000; + max-width: 400px; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + `; + + errorDiv.innerHTML = ` +
+ Not Enough Data +
+

+ We found ${dataCount} items, but need at least 100 to create a meaningful space. +

+ + `; + + document.body.appendChild(errorDiv); + setTimeout(() => errorDiv.remove(), 8000); +}; + +const showNoTabsError = () => { + const errorDiv = document.createElement('div'); + errorDiv.style.cssText = ` + position: fixed; + top: 20px; + right: 20px; + background: linear-gradient(135deg, #ff9500, #ff6b35); + color: white; + padding: 20px; + border-radius: 12px; + box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3); + z-index: 10000; + max-width: 400px; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + `; + + errorDiv.innerHTML = ` +
+ No Tabs Found +
+

+ Unable to gather enough tab information. Please ensure the extension has permissions and that you have at least 3 tabs open. +

+ + `; + + document.body.appendChild(errorDiv); + setTimeout(() => errorDiv.remove(), 5000); +}; + +const injectUI = async (space_id: string, onMessage: onMessageType, registerListeners: registerListenersType) => { + const menu = document.querySelector("#hdtb-sc > div > div > div.crJ18e")?.children[0]; + + if (!menu) { + console.error('Could not find Google search menu'); + return null; + } + + const div = document.createElement("div"); + const label = document.createElement("label"); + label.style.display = "inline-flex"; + label.style.alignItems = "center"; + label.style.cursor = "pointer"; + label.className = "nPDzT T3FoJb YmvwI"; + label.style.marginLeft = "8px"; + + const checkbox = document.createElement("input"); + checkbox.type = "checkbox"; + checkbox.style.display = "none"; + + const textContainer = document.createElement("span"); + textContainer.innerText = "Tabs"; + textContainer.style.background = "linear-gradient(90deg, #4285f4, #34a853)"; + textContainer.style.backgroundClip = "text"; + textContainer.style.webkitTextFillColor = "transparent"; + textContainer.style.fontWeight = "bold"; + + await registerAuthCookies(); + const iframeScalerParent = await getSpacePortal(space_id, onMessage, registerListeners); + + checkbox.addEventListener("change", () => { + if (checkbox.checked) { + iframeScalerParent.style.display = "block"; + textContainer.style.background = "linear-gradient(90deg, #1a73e8, #137333)"; + } else { + iframeScalerParent.style.display = "none"; + textContainer.style.background = "linear-gradient(90deg, #4285f4, #34a853)"; + } + textContainer.style.backgroundClip = "text"; + textContainer.style.webkitTextFillColor = "transparent"; + }); + + label.appendChild(textContainer); + label.appendChild(checkbox); + div.appendChild(label); + + const appbar = document.querySelector("#appbar > div > div:nth-child(2)"); + if (appbar) { + appbar.prepend(iframeScalerParent); + } + + menu.insertBefore(div, menu.children[2]); + return div; +} + +export const ChromeTabsConnection: MantisConnection = { + name: "Chrome Tabs", + description: "Analyzes all your currently open browser tabs", + icon: chromeIcon, + trigger: trigger, + createSpace: createSpace, + injectUI: injectUI, +} \ No newline at end of file