diff --git a/.DS_Store b/.DS_Store index 8d4053d..f150822 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index e922e67..56d2912 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ amazon_links.json records*.json books-notion-table.csv kindle-highlights -kindle-export-supabase.js \ No newline at end of file +kindle-export-supabase.js +ocr \ No newline at end of file diff --git a/compile-quotes.js b/compile-quotes.js index 645fb8b..dff0aab 100644 --- a/compile-quotes.js +++ b/compile-quotes.js @@ -9,6 +9,8 @@ import dotenv from "dotenv"; import fs from "fs"; import similarity from "compute-cosine-similarity"; import { Configuration, OpenAIApi } from "openai"; +import * as math from "mathjs"; +import { kmeans } from "ml-kmeans"; dotenv.config(); @@ -21,8 +23,8 @@ const openai = new OpenAIApi(configuration); dotenv.config(); -const CLUSTER_THRESHOLD = 0.8; -const BOOK_ID = "38318378"; +const CLUSTER_THRESHOLD = 0.78; +const BOOK_ID = "38531384"; const supabaseUrl = process.env.SUPABASE_URL; const supabaseKey = process.env.SUPABASE_KEY; @@ -32,6 +34,44 @@ const supabase = createClient(supabaseUrl, supabaseKey, { }, }); +// Define the k-means clustering algorithm +function kMeansLocal(data, k) { + // Initialize the centroids + const centroids = []; + let prevCentroids = []; + for (let i = 0; i < k; i++) { + centroids.push(data[Math.floor(Math.random() * data.length)]); + } + + // Assign each data point to the closest centroid + const assignments = []; + for (let i = 0; i < data.length; i++) { + const distances = []; + for (let j = 0; j < centroids.length; j++) { + distances.push(math.distance(data[i], centroids[j])); + } + assignments.push(distances.indexOf(Math.min(...distances))); + } + + // Update the centroids + for (let i = 0; i < k; i++) { + const cluster = data.filter((d, j) => assignments[j] === i); + if (cluster.length === 0) { + continue; + } + centroids[i] = math.mean(cluster, 0); + } + + // Repeat until the centroids no longer change + if (!math.deepEqual(centroids, prevCentroids)) { + prevCentroids = centroids; + return kMeans(data, k); + } + + // Return the assignments + return assignments; +} + const getQuotes = async (bookId) => { const { data, error } = await supabase .from("highlights") @@ -120,7 +160,7 @@ function clusterEmbeddings(quotes, threshold = 0.01) { const assignTopicToCluster = async (cluster) => { try { - const prompt = `Given the following quotes, what is a good topic for them? Return only the topic as a Markdown heading with no leading #`; + const prompt = `Given the following quotes, what is a good topic for them? Return only the topic as a Markdown heading with no leading #. No bold (**) or italics (*) are needed.`; const completion = await openai.createChatCompletion({ messages: [ @@ -154,34 +194,133 @@ const assignTopicToCluster = async (cluster) => { } }; +const summarizeCluster = async (cluster) => { + try { + const prompt = `Given the following quotes, summarize them into a two-three sentence summary. Return only the summary`; + + const completion = await openai.createChatCompletion({ + messages: [ + { + role: "system", + content: prompt, + }, + { + role: "user", + content: cluster.map((quote) => quote.text).join("\n"), + }, + ], + model: "gpt-3.5-turbo", + }); + + const content = completion.data.choices[0].message.content; + + return content.trim(); + } catch (err) { + console.log("START ERROR"); + console.error(err); + console.error(err.response); + console.error(err.response.data); + console.error(err.response.data.error); + console.error(err.response.data.error.message); + console.error(err.response.data.error.code); + console.error(err.response.data.error.status); + console.error(err.response.data.error.request); + console.log("END ERROR"); + throw err; + } +}; + +const followUpQuestions = async (cluster) => { + try { + const prompt = `Given the following quotes, what are some follow up questions you could ask about them? Return only the questions as a bulleted list`; + + const completion = await openai.createChatCompletion({ + messages: [ + { + role: "system", + content: prompt, + }, + { + role: "user", + content: cluster.map((quote) => quote.text).join("\n"), + }, + ], + model: "gpt-3.5-turbo", + }); + + const content = completion.data.choices[0].message.content; + + return content.trim(); + } catch (err) { + console.log("START ERROR"); + console.error(err); + console.error(err.response); + console.error(err.response.data); + console.error(err.response.data.error); + console.error(err.response.data.error.message); + console.error(err.response.data.error.code); + console.error(err.response.data.error.status); + console.error(err.response.data.error.request); + console.log("END ERROR"); + throw err; + } +}; + const main = async () => { const quotes = await compileQuotesFomID(BOOK_ID); console.log(quotes.length + " quotes found."); - let clusteredQuotes = clusterEmbeddings(quotes, CLUSTER_THRESHOLD); + // let clusteredQuotes = clusterEmbeddings(quotes, CLUSTER_THRESHOLD); + + // Example usage + // const data = [ + // [1, 2], + // [3, 4], + // [5, 6], + // [7, 8], + // ]; + const k = 5; + const assignments = kmeans( + quotes + .map((quote) => JSON.parse(quote.embedding)) + .filter((quote) => quote.length === 1536), + k + ); + + // console.log(assignments); // convert each cluster into a heading and a list of quotes in markdown under it and write to a file // use cluster index as heading // each quote is a bullet point under the heading - let markdown = ""; - let tableOfContents = ""; - for (let index = 0; index < clusteredQuotes.length; index++) { - const cluster = clusteredQuotes[index]; - const topic = await assignTopicToCluster(cluster); - console.log("Cluster " + index + ": " + topic); - tableOfContents += `${index + 1}. [Cluster ${index} - ${topic}](#cluster-${index}---${topic})\n`; - markdown += `## Cluster ${index} - ${topic}\n`; - cluster.forEach((quote) => { - markdown += `- ${quote.text}\n`; - }); - } - markdown = `# Table of Contents\n${tableOfContents}\n${markdown}`; + const clusters = []; + for (let i = 0; i < k; i++) { + clusters.push([]); + } + for (let i = 0; i < assignments.clusters.length; i++) { + clusters[assignments.clusters[i]].push(quotes[i]); + } + for (let i = 0; i < clusters.length; i++) { + const cluster = clusters[i]; + console.log(`Cluster ${i} has ${cluster.length} quotes.`); + const topic = await assignTopicToCluster(cluster); + const summary = await summarizeCluster(cluster); + // const followUp = await followUpQuestions(cluster); + + markdown += `## ${topic}\n\n`; + markdown += `### Summary\n\n${summary}\n\n`; + markdown += `### Quotes\n\n`; + cluster.forEach((quote) => { + markdown += `- ${quote.text}\n`; + }); + markdown += `\n\n`; + // markdown += `### Follow Up Questions\n\n${followUp}\n\n`; + } - fs.writeFileSync("quotes.md", markdown); + fs.writeFileSync("output.md", markdown); }; // main(); diff --git a/package-lock.json b/package-lock.json index 3058284..10aec62 100644 --- a/package-lock.json +++ b/package-lock.json @@ -23,6 +23,8 @@ "heic-convert": "^1.2.4", "jimp": "^0.22.10", "jsdom": "^24.0.0", + "mathjs": "^12.4.1", + "ml-kmeans": "^6.0.0", "node-fetch": "^3.3.2", "openai": "^3.3.0", "supabase": "^1.77.9", @@ -33,6 +35,22 @@ "node": ">=19.5.0" } }, + "node_modules/@babel/runtime": { + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.24.1.tgz", + "integrity": "sha512-+BIznRzyqBf+2wCTxcKE3wDjfGeCoVE61KSHGpkzqrLi8qxqFwBeUFyId2cxkTmm55fzDGnm0+yCxaxygrLUnQ==", + "dependencies": { + "regenerator-runtime": "^0.14.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/runtime/node_modules/regenerator-runtime": { + "version": "0.14.1", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz", + "integrity": "sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==" + }, "node_modules/@jimp/bmp": { "version": "0.22.10", "resolved": "https://registry.npmjs.org/@jimp/bmp/-/bmp-0.22.10.tgz", @@ -877,6 +895,18 @@ "node": ">= 0.8" } }, + "node_modules/complex.js": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/complex.js/-/complex.js-2.1.1.tgz", + "integrity": "sha512-8njCHOTtFFLtegk6zQo0kkVX1rngygb/KQI6z1qZxlFI3scluC+LVTCFbrkWjBv4vvLlbQ9t88IPMC6k95VTTg==", + "engines": { + "node": "*" + }, + "funding": { + "type": "patreon", + "url": "https://www.patreon.com/infusion" + } + }, "node_modules/compute-cosine-similarity": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/compute-cosine-similarity/-/compute-cosine-similarity-1.1.0.tgz", @@ -1212,6 +1242,11 @@ "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==" }, + "node_modules/escape-latex": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/escape-latex/-/escape-latex-1.2.0.tgz", + "integrity": "sha512-nV5aVWW1K0wEiUIEdZ4erkGGH8mDxGyxSeqPzRNtWP7ataw+/olFObw7hujFWlVjNsaDFw5VZ5NzVSIqRgfTiw==" + }, "node_modules/etag": { "version": "1.8.1", "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", @@ -1449,6 +1484,18 @@ "node": ">= 0.6" } }, + "node_modules/fraction.js": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.4.tgz", + "integrity": "sha512-pwiTgt0Q7t+GHZA4yaLjObx4vXmmdcS0iSJ19o8d/goUGgItX9UZWKWNnLHehxviD8wU2IWRsnR8cD5+yOJP2Q==", + "engines": { + "node": "*" + }, + "funding": { + "type": "patreon", + "url": "https://github.com/sponsors/rawify" + } + }, "node_modules/fresh": { "version": "0.5.2", "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", @@ -1829,6 +1876,11 @@ "node": ">= 0.10" } }, + "node_modules/is-any-array": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz", + "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==" + }, "node_modules/is-docker": { "version": "2.2.1", "resolved": "https://registry.npmjs.org/is-docker/-/is-docker-2.2.1.tgz", @@ -1921,6 +1973,11 @@ } } }, + "node_modules/javascript-natural-sort": { + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/javascript-natural-sort/-/javascript-natural-sort-0.7.1.tgz", + "integrity": "sha512-nO6jcEfZWQXDhOiBtG2KvKyEptz7RVbpGP4vTD2hLBdmNQSsCiicO2Ioinv6UI4y9ukqnBpy+XZ9H6uLNgJTlw==" + }, "node_modules/jimp": { "version": "0.22.10", "resolved": "https://registry.npmjs.org/jimp/-/jimp-0.22.10.tgz", @@ -2107,6 +2164,28 @@ "semver": "bin/semver.js" } }, + "node_modules/mathjs": { + "version": "12.4.1", + "resolved": "https://registry.npmjs.org/mathjs/-/mathjs-12.4.1.tgz", + "integrity": "sha512-welnW3khgwYjPYvECFHO+xkCxAx9IKIIPDDWPi8B5rKAvmgoEHnQX9slEmHKZTNaJiE+OS4qrJJcB4sfDn/4sw==", + "dependencies": { + "@babel/runtime": "^7.24.0", + "complex.js": "^2.1.1", + "decimal.js": "^10.4.3", + "escape-latex": "^1.2.0", + "fraction.js": "4.3.4", + "javascript-natural-sort": "^0.7.1", + "seedrandom": "^3.0.5", + "tiny-emitter": "^2.1.0", + "typed-function": "^4.1.1" + }, + "bin": { + "mathjs": "bin/cli.js" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/media-typer": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", @@ -2243,6 +2322,78 @@ "node": ">=10" } }, + "node_modules/ml-array-max": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/ml-array-max/-/ml-array-max-1.2.4.tgz", + "integrity": "sha512-BlEeg80jI0tW6WaPyGxf5Sa4sqvcyY6lbSn5Vcv44lp1I2GR6AWojfUvLnGTNsIXrZ8uqWmo8VcG1WpkI2ONMQ==", + "dependencies": { + "is-any-array": "^2.0.0" + } + }, + "node_modules/ml-array-min": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/ml-array-min/-/ml-array-min-1.2.3.tgz", + "integrity": "sha512-VcZ5f3VZ1iihtrGvgfh/q0XlMobG6GQ8FsNyQXD3T+IlstDv85g8kfV0xUG1QPRO/t21aukaJowDzMTc7j5V6Q==", + "dependencies": { + "is-any-array": "^2.0.0" + } + }, + "node_modules/ml-array-rescale": { + "version": "1.3.7", + "resolved": "https://registry.npmjs.org/ml-array-rescale/-/ml-array-rescale-1.3.7.tgz", + "integrity": "sha512-48NGChTouvEo9KBctDfHC3udWnQKNKEWN0ziELvY3KG25GR5cA8K8wNVzracsqSW1QEkAXjTNx+ycgAv06/1mQ==", + "dependencies": { + "is-any-array": "^2.0.0", + "ml-array-max": "^1.2.4", + "ml-array-min": "^1.2.3" + } + }, + "node_modules/ml-distance-euclidean": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ml-distance-euclidean/-/ml-distance-euclidean-2.0.0.tgz", + "integrity": "sha512-yC9/2o8QF0A3m/0IXqCTXCzz2pNEzvmcE/9HFKOZGnTjatvBbsn4lWYJkxENkA4Ug2fnYl7PXQxnPi21sgMy/Q==" + }, + "node_modules/ml-kmeans": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/ml-kmeans/-/ml-kmeans-6.0.0.tgz", + "integrity": "sha512-aziEZqeHxczaDvo1qkfCrC7XNVAPevs6PigAzy7dp9TzeQI7oGan6NfCgADwL/FAlA/wWi+1DkV8da6pXfuuPg==", + "dependencies": { + "ml-distance-euclidean": "^2.0.0", + "ml-matrix": "^6.9.0", + "ml-nearest-vector": "^2.0.1", + "ml-random": "^0.5.0" + } + }, + "node_modules/ml-matrix": { + "version": "6.11.0", + "resolved": "https://registry.npmjs.org/ml-matrix/-/ml-matrix-6.11.0.tgz", + "integrity": "sha512-7jr9NmFRkaUxbKslfRu3aZOjJd2LkSitCGv+QH9PF0eJoEG7jIpjXra1Vw8/kgao8+kHCSsJONG6vfWmXQ+/Eg==", + "dependencies": { + "is-any-array": "^2.0.1", + "ml-array-rescale": "^1.3.7" + } + }, + "node_modules/ml-nearest-vector": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/ml-nearest-vector/-/ml-nearest-vector-2.0.1.tgz", + "integrity": "sha512-gMPwNm3eed59ewJYiCK/+wElWBfNoD6JizH965ePiQgCo0pvQL63w4YdZhLs5eUV0iWcq6brVMUBL6iMySHnqg==", + "dependencies": { + "ml-distance-euclidean": "^2.0.0" + } + }, + "node_modules/ml-random": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/ml-random/-/ml-random-0.5.0.tgz", + "integrity": "sha512-zLJBmNb34LOz+vN6BD8l3aYm/VWYWbmAunrLMPs4dHf4gTl8BWlhil72j56HubPg86zrXioIs4qoHq7Topy6tw==", + "dependencies": { + "ml-xsadd": "^2.0.0" + } + }, + "node_modules/ml-xsadd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ml-xsadd/-/ml-xsadd-2.0.0.tgz", + "integrity": "sha512-VoAYUqmPRmzKbbqRejjqceGFp3VF81Qe8XXFGU0UXLxB7Mf4GGvyGq5Qn3k4AiQgDEV6WzobqlPOd+j0+m6IrA==" + }, "node_modules/ms": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", @@ -2740,6 +2891,11 @@ "node": ">=v12.22.7" } }, + "node_modules/seedrandom": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/seedrandom/-/seedrandom-3.0.5.tgz", + "integrity": "sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg==" + }, "node_modules/semver": { "version": "7.5.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz", @@ -2985,6 +3141,11 @@ "resolved": "https://registry.npmjs.org/timm/-/timm-1.7.1.tgz", "integrity": "sha512-IjZc9KIotudix8bMaBW6QvMuq64BrJWFs1+4V0lXwWGQZwH+LnX87doAYhem4caOEusRP9/g6jVDQmZ8XOk1nw==" }, + "node_modules/tiny-emitter": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/tiny-emitter/-/tiny-emitter-2.1.0.tgz", + "integrity": "sha512-NB6Dk1A9xgQPMoGqC5CVXn123gWyte215ONT5Pp5a0yt4nlEoO1ZWeCwpncaekPHXO60i47ihFnZPiRPjRMq4Q==" + }, "node_modules/tinycolor2": { "version": "1.6.0", "resolved": "https://registry.npmjs.org/tinycolor2/-/tinycolor2-1.6.0.tgz", @@ -3050,6 +3211,14 @@ "node": ">= 0.6" } }, + "node_modules/typed-function": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/typed-function/-/typed-function-4.1.1.tgz", + "integrity": "sha512-Pq1DVubcvibmm8bYcMowjVnnMwPVMeh0DIdA8ad8NZY2sJgapANJmiigSUwlt+EgXxpfIv8MWrQXTIzkfYZLYQ==", + "engines": { + "node": ">= 14" + } + }, "node_modules/typedarray-to-buffer": { "version": "3.1.5", "resolved": "https://registry.npmjs.org/typedarray-to-buffer/-/typedarray-to-buffer-3.1.5.tgz", diff --git a/package.json b/package.json index 3521143..49e1ece 100644 --- a/package.json +++ b/package.json @@ -29,6 +29,8 @@ "heic-convert": "^1.2.4", "jimp": "^0.22.10", "jsdom": "^24.0.0", + "mathjs": "^12.4.1", + "ml-kmeans": "^6.0.0", "node-fetch": "^3.3.2", "openai": "^3.3.0", "supabase": "^1.77.9", diff --git a/poetry.lock b/poetry.lock index 08dde5d..d7af8dc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1179,6 +1179,92 @@ sql-other = ["SQLAlchemy (>=1.4.16)"] test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.6.3)"] +[[package]] +name = "pillow" +version = "10.2.0" +description = "Python Imaging Library (Fork)" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pillow-10.2.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:7823bdd049099efa16e4246bdf15e5a13dbb18a51b68fa06d6c1d4d8b99a796e"}, + {file = "pillow-10.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:83b2021f2ade7d1ed556bc50a399127d7fb245e725aa0113ebd05cfe88aaf588"}, + {file = "pillow-10.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fad5ff2f13d69b7e74ce5b4ecd12cc0ec530fcee76356cac6742785ff71c452"}, + {file = "pillow-10.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da2b52b37dad6d9ec64e653637a096905b258d2fc2b984c41ae7d08b938a67e4"}, + {file = "pillow-10.2.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:47c0995fc4e7f79b5cfcab1fc437ff2890b770440f7696a3ba065ee0fd496563"}, + {file = "pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:322bdf3c9b556e9ffb18f93462e5f749d3444ce081290352c6070d014c93feb2"}, + {file = "pillow-10.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:51f1a1bffc50e2e9492e87d8e09a17c5eea8409cda8d3f277eb6edc82813c17c"}, + {file = "pillow-10.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:69ffdd6120a4737710a9eee73e1d2e37db89b620f702754b8f6e62594471dee0"}, + {file = "pillow-10.2.0-cp310-cp310-win32.whl", hash = "sha256:c6dafac9e0f2b3c78df97e79af707cdc5ef8e88208d686a4847bab8266870023"}, + {file = "pillow-10.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:aebb6044806f2e16ecc07b2a2637ee1ef67a11840a66752751714a0d924adf72"}, + {file = "pillow-10.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:7049e301399273a0136ff39b84c3678e314f2158f50f517bc50285fb5ec847ad"}, + {file = "pillow-10.2.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:35bb52c37f256f662abdfa49d2dfa6ce5d93281d323a9af377a120e89a9eafb5"}, + {file = "pillow-10.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c23f307202661071d94b5e384e1e1dc7dfb972a28a2310e4ee16103e66ddb67"}, + {file = "pillow-10.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:773efe0603db30c281521a7c0214cad7836c03b8ccff897beae9b47c0b657d61"}, + {file = "pillow-10.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11fa2e5984b949b0dd6d7a94d967743d87c577ff0b83392f17cb3990d0d2fd6e"}, + {file = "pillow-10.2.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:716d30ed977be8b37d3ef185fecb9e5a1d62d110dfbdcd1e2a122ab46fddb03f"}, + {file = "pillow-10.2.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a086c2af425c5f62a65e12fbf385f7c9fcb8f107d0849dba5839461a129cf311"}, + {file = "pillow-10.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c8de2789052ed501dd829e9cae8d3dcce7acb4777ea4a479c14521c942d395b1"}, + {file = "pillow-10.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:609448742444d9290fd687940ac0b57fb35e6fd92bdb65386e08e99af60bf757"}, + {file = "pillow-10.2.0-cp311-cp311-win32.whl", hash = "sha256:823ef7a27cf86df6597fa0671066c1b596f69eba53efa3d1e1cb8b30f3533068"}, + {file = "pillow-10.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:1da3b2703afd040cf65ec97efea81cfba59cdbed9c11d8efc5ab09df9509fc56"}, + {file = "pillow-10.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:edca80cbfb2b68d7b56930b84a0e45ae1694aeba0541f798e908a49d66b837f1"}, + {file = "pillow-10.2.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:1b5e1b74d1bd1b78bc3477528919414874748dd363e6272efd5abf7654e68bef"}, + {file = "pillow-10.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0eae2073305f451d8ecacb5474997c08569fb4eb4ac231ffa4ad7d342fdc25ac"}, + {file = "pillow-10.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7c2286c23cd350b80d2fc9d424fc797575fb16f854b831d16fd47ceec078f2c"}, + {file = "pillow-10.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e23412b5c41e58cec602f1135c57dfcf15482013ce6e5f093a86db69646a5aa"}, + {file = "pillow-10.2.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:52a50aa3fb3acb9cf7213573ef55d31d6eca37f5709c69e6858fe3bc04a5c2a2"}, + {file = "pillow-10.2.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:127cee571038f252a552760076407f9cff79761c3d436a12af6000cd182a9d04"}, + {file = "pillow-10.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8d12251f02d69d8310b046e82572ed486685c38f02176bd08baf216746eb947f"}, + {file = "pillow-10.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:54f1852cd531aa981bc0965b7d609f5f6cc8ce8c41b1139f6ed6b3c54ab82bfb"}, + {file = "pillow-10.2.0-cp312-cp312-win32.whl", hash = "sha256:257d8788df5ca62c980314053197f4d46eefedf4e6175bc9412f14412ec4ea2f"}, + {file = "pillow-10.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:154e939c5f0053a383de4fd3d3da48d9427a7e985f58af8e94d0b3c9fcfcf4f9"}, + {file = "pillow-10.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:f379abd2f1e3dddb2b61bc67977a6b5a0a3f7485538bcc6f39ec76163891ee48"}, + {file = "pillow-10.2.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8373c6c251f7ef8bda6675dd6d2b3a0fcc31edf1201266b5cf608b62a37407f9"}, + {file = "pillow-10.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:870ea1ada0899fd0b79643990809323b389d4d1d46c192f97342eeb6ee0b8483"}, + {file = "pillow-10.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4b6b1e20608493548b1f32bce8cca185bf0480983890403d3b8753e44077129"}, + {file = "pillow-10.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3031709084b6e7852d00479fd1d310b07d0ba82765f973b543c8af5061cf990e"}, + {file = "pillow-10.2.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:3ff074fc97dd4e80543a3e91f69d58889baf2002b6be64347ea8cf5533188213"}, + {file = "pillow-10.2.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:cb4c38abeef13c61d6916f264d4845fab99d7b711be96c326b84df9e3e0ff62d"}, + {file = "pillow-10.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b1b3020d90c2d8e1dae29cf3ce54f8094f7938460fb5ce8bc5c01450b01fbaf6"}, + {file = "pillow-10.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:170aeb00224ab3dc54230c797f8404507240dd868cf52066f66a41b33169bdbe"}, + {file = "pillow-10.2.0-cp38-cp38-win32.whl", hash = "sha256:c4225f5220f46b2fde568c74fca27ae9771536c2e29d7c04f4fb62c83275ac4e"}, + {file = "pillow-10.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:0689b5a8c5288bc0504d9fcee48f61a6a586b9b98514d7d29b840143d6734f39"}, + {file = "pillow-10.2.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:b792a349405fbc0163190fde0dc7b3fef3c9268292586cf5645598b48e63dc67"}, + {file = "pillow-10.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c570f24be1e468e3f0ce7ef56a89a60f0e05b30a3669a459e419c6eac2c35364"}, + {file = "pillow-10.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8ecd059fdaf60c1963c58ceb8997b32e9dc1b911f5da5307aab614f1ce5c2fb"}, + {file = "pillow-10.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c365fd1703040de1ec284b176d6af5abe21b427cb3a5ff68e0759e1e313a5e7e"}, + {file = "pillow-10.2.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:70c61d4c475835a19b3a5aa42492409878bbca7438554a1f89d20d58a7c75c01"}, + {file = "pillow-10.2.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b6f491cdf80ae540738859d9766783e3b3c8e5bd37f5dfa0b76abdecc5081f13"}, + {file = "pillow-10.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d189550615b4948f45252d7f005e53c2040cea1af5b60d6f79491a6e147eef7"}, + {file = "pillow-10.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:49d9ba1ed0ef3e061088cd1e7538a0759aab559e2e0a80a36f9fd9d8c0c21591"}, + {file = "pillow-10.2.0-cp39-cp39-win32.whl", hash = "sha256:babf5acfede515f176833ed6028754cbcd0d206f7f614ea3447d67c33be12516"}, + {file = "pillow-10.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:0304004f8067386b477d20a518b50f3fa658a28d44e4116970abfcd94fac34a8"}, + {file = "pillow-10.2.0-cp39-cp39-win_arm64.whl", hash = "sha256:0fb3e7fc88a14eacd303e90481ad983fd5b69c761e9e6ef94c983f91025da869"}, + {file = "pillow-10.2.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:322209c642aabdd6207517e9739c704dc9f9db943015535783239022002f054a"}, + {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3eedd52442c0a5ff4f887fab0c1c0bb164d8635b32c894bc1faf4c618dd89df2"}, + {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb28c753fd5eb3dd859b4ee95de66cc62af91bcff5db5f2571d32a520baf1f04"}, + {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:33870dc4653c5017bf4c8873e5488d8f8d5f8935e2f1fb9a2208c47cdd66efd2"}, + {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3c31822339516fb3c82d03f30e22b1d038da87ef27b6a78c9549888f8ceda39a"}, + {file = "pillow-10.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a2b56ba36e05f973d450582fb015594aaa78834fefe8dfb8fcd79b93e64ba4c6"}, + {file = "pillow-10.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d8e6aeb9201e655354b3ad049cb77d19813ad4ece0df1249d3c793de3774f8c7"}, + {file = "pillow-10.2.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:2247178effb34a77c11c0e8ac355c7a741ceca0a732b27bf11e747bbc950722f"}, + {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15587643b9e5eb26c48e49a7b33659790d28f190fc514a322d55da2fb5c2950e"}, + {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753cd8f2086b2b80180d9b3010dd4ed147efc167c90d3bf593fe2af21265e5a5"}, + {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7c8f97e8e7a9009bcacbe3766a36175056c12f9a44e6e6f2d5caad06dcfbf03b"}, + {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d1b35bcd6c5543b9cb547dee3150c93008f8dd0f1fef78fc0cd2b141c5baf58a"}, + {file = "pillow-10.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe4c15f6c9285dc54ce6553a3ce908ed37c8f3825b5a51a15c91442bb955b868"}, + {file = "pillow-10.2.0.tar.gz", hash = "sha256:e87f0b2c78157e12d7686b27d63c070fd65d994e8ddae6f328e0dcf4a0cd007e"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions"] +xmp = ["defusedxml"] + [[package]] name = "pkginfo" version = "1.9.6" @@ -1343,6 +1429,22 @@ files = [ {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"}, ] +[[package]] +name = "pytesseract" +version = "0.3.10" +description = "Python-tesseract is a python wrapper for Google's Tesseract-OCR" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytesseract-0.3.10-py3-none-any.whl", hash = "sha256:8f22cc98f765bf13517ead0c70effedb46c153540d25783e04014f28b55a5fc6"}, + {file = "pytesseract-0.3.10.tar.gz", hash = "sha256:f1c3a8b0f07fd01a1085d451f5b8315be6eec1d5577a6796d46dc7a62bd4120f"}, +] + +[package.dependencies] +packaging = ">=21.3" +Pillow = ">=8.0.0" + [[package]] name = "python-dateutil" version = "2.8.2" @@ -2134,4 +2236,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "15966c065bb097a315b5cbf633709700ada5d4db7ca969b212cb75c4a919477d" +content-hash = "6fc557a579eaf7fa98d617c424c05dae9f28f4a6e0b7b0a206ff8337c6e12d18" diff --git a/pyproject.toml b/pyproject.toml index 29aee20..7a425d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ nomic = "^2.0.5" supabase = "^1.0.3" bs4 = "^0.0.1" selenium = "^4.12.0" +pytesseract = "^0.3.10" [build-system]