Skip to content

Commit

Permalink
fix: reenable CI
Browse files Browse the repository at this point in the history
Change-Id: I6661ff6e89ed48068baaabc4d0e4e36ad85bbd3e
  • Loading branch information
Gen Lu committed Sep 19, 2024
1 parent 6e48fd5 commit 9c4f119
Show file tree
Hide file tree
Showing 4 changed files with 416 additions and 418 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
"id": "c7ff518d-f4d2-481b-b408-2c2507565611",
"metadata": {},
"source": [
"## Creating the Database Connection\n",
"## Download the Data\n",
"\n",
"Let's now set up a connection to your CloudSQL database:"
"Let's now import required modules:"
]
},
{
Expand All @@ -60,42 +60,7 @@
"from datasets import load_dataset_builder, load_dataset, Dataset\n",
"from huggingface_hub import snapshot_download\n",
"from google.cloud.sql.connector import Connector, IPTypes\n",
"import sqlalchemy\n",
"\n",
"# initialize parameters\n",
"\n",
"INSTANCE_CONNECTION_NAME = os.environ[\"CLOUDSQL_INSTANCE_CONNECTION_NAME\"]\n",
"print(f\"Your instance connection name is: {INSTANCE_CONNECTION_NAME}\")\n",
"DB_NAME = \"pgvector-database\"\n",
"\n",
"db_username_file = open(\"/etc/secret-volume/username\", \"r\")\n",
"DB_USER = db_username_file.read()\n",
"db_username_file.close()\n",
"\n",
"db_password_file = open(\"/etc/secret-volume/password\", \"r\")\n",
"DB_PASS = db_password_file.read()\n",
"db_password_file.close()\n",
"\n",
"# initialize Connector object\n",
"connector = Connector()\n",
"\n",
"# function to return the database connection object\n",
"def getconn():\n",
" conn = connector.connect(\n",
" INSTANCE_CONNECTION_NAME,\n",
" \"pg8000\",\n",
" user=DB_USER,\n",
" password=DB_PASS,\n",
" db=DB_NAME,\n",
" ip_type=IPTypes.PRIVATE\n",
" )\n",
" return conn\n",
"\n",
"# create connection pool with 'creator' argument to our connection object function\n",
"pool = sqlalchemy.create_engine(\n",
" \"postgresql+pg8000://\",\n",
" creator=getconn,\n",
")"
"import sqlalchemy"
]
},
{
Expand Down Expand Up @@ -123,7 +88,7 @@
"SHARED_DATASET_BASE_PATH=\"/data/netflix-shows/\"\n",
"REVIEWS_FILE_NAME=\"netflix_titles.csv\"\n",
"\n",
"BATCH_SIZE = 100\n",
"BATCH_SIZE = 500\n",
"CHUNK_SIZE = 1000 # text chunk sizes which will be converted to vector embeddings\n",
"CHUNK_OVERLAP = 10\n",
"TABLE_NAME = 'netflix_reviews_db' # CloudSQL table name\n",
Expand Down Expand Up @@ -322,6 +287,40 @@
"from sqlalchemy.orm import scoped_session, sessionmaker, mapped_column\n",
"from pgvector.sqlalchemy import Vector\n",
"\n",
"# initialize parameters\n",
"\n",
"INSTANCE_CONNECTION_NAME = os.environ[\"CLOUDSQL_INSTANCE_CONNECTION_NAME\"]\n",
"print(f\"Your instance connection name is: {INSTANCE_CONNECTION_NAME}\")\n",
"DB_NAME = \"pgvector-database\"\n",
"\n",
"db_username_file = open(\"/etc/secret-volume/username\", \"r\")\n",
"DB_USER = db_username_file.read()\n",
"db_username_file.close()\n",
"\n",
"db_password_file = open(\"/etc/secret-volume/password\", \"r\")\n",
"DB_PASS = db_password_file.read()\n",
"db_password_file.close()\n",
"\n",
"# initialize Connector object\n",
"connector = Connector()\n",
"\n",
"# function to return the database connection object\n",
"def getconn():\n",
" conn = connector.connect(\n",
" INSTANCE_CONNECTION_NAME,\n",
" \"pg8000\",\n",
" user=DB_USER,\n",
" password=DB_PASS,\n",
" db=DB_NAME,\n",
" ip_type=IPTypes.PRIVATE\n",
" )\n",
" return conn\n",
"\n",
"# create connection pool with 'creator' argument to our connection object function\n",
"pool = sqlalchemy.create_engine(\n",
" \"postgresql+pg8000://\",\n",
" creator=getconn,\n",
")\n",
"\n",
"Base = declarative_base()\n",
"DBSession = scoped_session(sessionmaker())\n",
Expand Down
Loading

0 comments on commit 9c4f119

Please sign in to comment.