From 046f78ea603d01d1fa4428e38022defc88b619e1 Mon Sep 17 00:00:00 2001 From: Bram Adams <3282661+bramses@users.noreply.github.com> Date: Sun, 5 May 2024 09:02:51 -0400 Subject: [PATCH] Update .gitignore and fix filter condition in compile-quotes.js --- .DS_Store | Bin 10244 -> 10244 bytes .gitignore | 3 +- compile-quotes.js | 2 +- get-random-highlights-pl-spec.js | 83 +++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 get-random-highlights-pl-spec.js diff --git a/.DS_Store b/.DS_Store index f150822778bf4bb9229c41a45cd438f9b94e9b00..949da4b43d00bd04fa3befec86fbaf089938376f 100644 GIT binary patch delta 373 zcmZn(XbISmDqtwXkjRk3kj7BNkjjw4pvzDUq!WR>WQGhNFOeY+ESk)a4`h`9P4d<_5;KwVWKH%KFwp@!2`KdHG$F4J76Dy9qgmp|~u#C@&{J zFCD0oadVPDG?S$Q(8C}vmI1w31oWsb5a$EQ6rjIKfLNCy7bpW_Aw-H93K&u+Uyv4K pVv*ncKxh^VWBlZ5aanFzps9Qi&u)&E=waK;uJD^>vz9OuGXSe+UlITS delta 425 zcmZn(XbISmDzKSfD1wQTlOcs6k)ebkak7t$#N<~3f}4+vbhFg!GNdw8FcdK4GZXDllaq4tlNcBn1Q-|?XEQJ`J^c>`KoJH89)>)i$`YU+G<`YghQZ1CxdmWV5CW&# z+ { const assignments = kmeans( quotes .map((quote) => JSON.parse(quote.embedding)) - .filter((quote) => quote.length === 1536), + .filter((embedding) => embedding.length === 1536), k ); diff --git a/get-random-highlights-pl-spec.js b/get-random-highlights-pl-spec.js new file mode 100644 index 0000000..989cf86 --- /dev/null +++ b/get-random-highlights-pl-spec.js @@ -0,0 +1,83 @@ +// get random highlights of arg value number and put them in format { data , metadata } +import { fetchRandomHighlight } from "./get-random-highlight.js"; +import fs from "fs"; + +const convertToDataMetadata = async (amount, embeddings = false) => { + const highlights = await fetchRandomHighlight(amount, true); + const data = highlights.map((highlight) => highlight.text); + const metadata = highlights.map((highlight) => { + return { + title: highlight.book.title, + author: highlight.book.author, + book_id: highlight.book.book_id, + cover_image_url: highlight.book.cover_image_url, + readwise_url: highlight.readwise_url, + question: highlight.question, + thoughts: highlight.thoughts, + }; + }); + + if (embeddings) { + const embeddings = highlights.map((highlight) => highlight.embedding); + return data.map((data, index) => { + return { data: data, metadata: metadata[index], embedding: embeddings[index] }; + }); + } + + // zip data and metadata into an object and return it as an array + const result = data.map((data, index) => { + return { data: data, metadata: metadata[index] }; + }); + + return result; +}; + +// write to csv file with headers: data, metadata +const saveAsCSV = async (amount, embeddings = false) => { + const dataMetadata = await convertToDataMetadata(amount, embeddings); + + if (embeddings) { + const csv = dataMetadata.map((row) => { + const cleanData = row.data.replace(/,/g, ""); + const cleanMetadata = JSON.stringify(row.metadata).replace(/,/g, ""); + return `${cleanData},${JSON.stringify(cleanMetadata)},${row.embedding}`; + }); + + // add headers + csv.unshift("data,metadata,embedding"); + + const csvString = csv.join("\n"); + + fs.writeFile("highlights.csv", csvString, (err) => { + if (err) { + console.error(err); + return; + } + console.log("File has been created"); + }); + + return; + } + + const csv = dataMetadata.map((row) => { + const cleanData = row.data.replace(/,/g, ""); + const cleanMetadata = JSON.stringify(row.metadata).replace(/,/g, ""); + return `${cleanData},${JSON.stringify(cleanMetadata)}`; + }); + + // add headers + csv.unshift("data,metadata"); + + const csvString = csv.join("\n"); + + fs.writeFile("highlights.csv", csvString, (err) => { + if (err) { + console.error(err); + return; + } + console.log("File has been created"); + }); +}; + +const amount = 10; +saveAsCSV(amount);