Skip to content

Commit

Permalink
Replace pygresql with psycopg2 for gpMgmt tools. (#15988)
Browse files Browse the repository at this point in the history
This is the last patch for replacing pygresql with psycopg2 in Greenplum. This patch mainly targets the gpMgmt tools.

Benefits for replacing pygresql with psycopg2.
- Psycopg2 is maintained actively we have encountered bugs that haven't been fixed by the upstream yet, e.g., https://github.com/greenplum-db/gpdb/pull/13953.
- Psycopg2 is provided by Rocky Linux and Ubuntu. That is to say, we don't need to vendor it ourselves.
- Last but not least, we got a chance to clean up leacy codes during the removal process, e.g., https://github.com/greenplum-db/gpdb/pull/15983.

After this patch, we need to do the following things.
- Add psycopg2 as a dependency of the rpm/deb package.
- Remove the pygresql source code tarball from the gpdb repo.
- Tidy up READMEs and requirements.txt files.

---------

Co-authored-by: Chen Mulong <[email protected]>
Co-authored-by: Xiaoxiao He <[email protected]>
Co-authored-by: zhrt123 <[email protected]>
Co-authored-by: Piyush Chandwadkar <[email protected]>
Co-authored-by: Praveen Kumar <[email protected]>
  • Loading branch information
6 people authored and reshke committed Sep 16, 2024
1 parent 5a9f373 commit 5e25156
Show file tree
Hide file tree
Showing 39 changed files with 815 additions and 604 deletions.
17 changes: 8 additions & 9 deletions gpMgmt/bin/analyzedb
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,15 @@ from contextlib import closing
import pipes # for shell-quoting, pipes.quote()
import fcntl
import itertools

import psycopg2
try:
import pg

from gppylib import gplog, pgconf, userinput
from gppylib.commands.base import Command, WorkerPool, Worker
from gppylib.operations import Operation
from gppylib.gpversion import GpVersion
from gppylib.db import dbconn
from gppylib.operations.unix import CheckDir, CheckFile, MakeDir
from gppylib.utils import escape_string

except ImportError as e:
sys.exit('Cannot import modules. Please check that you have sourced greenplum_path.sh. Detail: ' + str(e))
Expand Down Expand Up @@ -166,7 +165,7 @@ def validate_schema_exists(pg_port, dbname, schema):
try:
dburl = dbconn.DbURL(port=pg_port, dbname=dbname)
conn = dbconn.connect(dburl)
count = dbconn.querySingleton(conn, "select count(*) from pg_namespace where nspname='%s';" % pg.escape_string(schema))
count = dbconn.querySingleton(conn, "select count(*) from pg_namespace where nspname='%s';" % escape_string(schema))
if count == 0:
raise ExceptionNoStackTraceNeeded("Schema %s does not exist in database %s." % (schema, dbname))
finally:
Expand Down Expand Up @@ -213,7 +212,7 @@ def get_partition_state_tuples(pg_port, dbname, catalog_schema, partition_info):
try:
modcount_sql = "select to_char(coalesce(sum(modcount::bigint), 0), '999999999999999999999') from gp_dist_random('%s.%s')" % (catalog_schema, tupletable)
modcount = dbconn.querySingleton(conn, modcount_sql)
except pg.DatabaseError as e:
except psycopg2.DatabaseError as e:
if "does not exist" in str(e):
logger.info("Table %s.%s (%s) no longer exists and will not be analyzed", schemaname, partition_name, tupletable)
else:
Expand Down Expand Up @@ -971,7 +970,7 @@ def get_oid_str(table_list):
def regclass_schema_tbl(schema, tbl):
schema_tbl = "%s.%s" % (escape_identifier(schema), escape_identifier(tbl))

return "to_regclass('%s')" % (pg.escape_string(schema_tbl))
return "to_regclass('%s')" % (escape_string(schema_tbl))


# Escape double-quotes in a string, so that the resulting string is suitable for
Expand Down Expand Up @@ -1239,7 +1238,7 @@ def validate_tables(conn, tablenames):
while curr_batch < nbatches:
batch = tablenames[curr_batch * batch_size:(curr_batch + 1) * batch_size]

oid_str = ','.join(map((lambda x: "('%s')" % pg.escape_string(x)), batch))
oid_str = ','.join(map((lambda x: "('%s')" % escape_string(x)), batch))
if not oid_str:
break

Expand All @@ -1255,7 +1254,7 @@ def get_include_cols_from_exclude(conn, schema, table, exclude_cols):
"""
Given a list of excluded columns of a table, get the list of included columns
"""
quoted_exclude_cols = ','.join(["'%s'" % pg.escape_string(x) for x in exclude_cols])
quoted_exclude_cols = ','.join(["'%s'" % escape_string(x) for x in exclude_cols])

oid_str = regclass_schema_tbl(schema, table)
cols = run_sql(conn, GET_INCLUDED_COLUMNS_FROM_EXCLUDE_SQL % (oid_str, quoted_exclude_cols))
Expand All @@ -1271,7 +1270,7 @@ def validate_columns(conn, schema, table, column_list):
return

sql = VALIDATE_COLUMN_NAMES_SQL % (regclass_schema_tbl(schema, table),
','.join(["'%s'" % pg.escape_string(x) for x in column_list]))
','.join(["'%s'" % escape_string(x) for x in column_list]))
valid_col_count = dbconn.querySingleton(conn, sql)

if int(valid_col_count) != len(column_list):
Expand Down
5 changes: 2 additions & 3 deletions gpMgmt/bin/gpactivatestandby
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ import time
import shutil
import tempfile
from datetime import datetime, timedelta

import psycopg2
# import GPDB modules
try:
import pg as pygresql
from gppylib.commands import unix, gp, pg
from gppylib.db import dbconn
from gppylib.gpparseopts import OptParser, OptChecker, OptionGroup, SUPPRESS_HELP
Expand Down Expand Up @@ -332,7 +331,7 @@ def promote_standby(coordinator_data_dir):
logger.info('Standby coordinator is promoted')
conn.close()
return True
except pygresql.InternalError as e:
except (psycopg2.InternalError, psycopg2.OperationalError) as e:
pass
time.sleep(1)

Expand Down
Loading

0 comments on commit 5e25156

Please sign in to comment.