Skip to content

Commit 2e559a8

Browse files
committed
moved pg to gymlib
1 parent cb1ca2a commit 2e559a8

File tree

5 files changed

+122
-105
lines changed

5 files changed

+122
-105
lines changed

dbms/postgres/cli.py

Lines changed: 2 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@
99
from typing import Any, Optional
1010

1111
import click
12-
import pglast
13-
import psutil
14-
import psycopg
1512
import sqlalchemy
13+
from gymlib.pg import create_sqlalchemy_conn, sql_file_execute
1614
from gymlib.shell import subprocess_run
1715
from gymlib.symlinks_paths import (
1816
get_dbdata_tgz_symlink_path,
@@ -28,7 +26,7 @@
2826
is_fully_resolved,
2927
is_ssd,
3028
)
31-
from sqlalchemy import create_engine, text
29+
from sqlalchemy import text
3230

3331
from benchmark.constants import DEFAULT_SCALE_FACTOR
3432
from benchmark.job.load_info import JobLoadInfo
@@ -365,91 +363,3 @@ def sqlalchemy_conn_execute(
365363
conn: sqlalchemy.Connection, sql: str
366364
) -> sqlalchemy.engine.CursorResult[Any]:
367365
return conn.execute(text(sql))
368-
369-
370-
def sql_file_queries(dbgym_workspace: DBGymWorkspace, filepath: Path) -> list[str]:
371-
with dbgym_workspace.open_and_save(filepath) as f:
372-
lines: list[str] = []
373-
for line in f:
374-
if line.startswith("--"):
375-
continue
376-
if len(line.strip()) == 0:
377-
continue
378-
lines.append(line)
379-
queries_str = "".join(lines)
380-
queries: list[str] = pglast.split(queries_str)
381-
return queries
382-
383-
384-
def sql_file_execute(
385-
dbgym_workspace: DBGymWorkspace, conn: sqlalchemy.Connection, filepath: Path
386-
) -> None:
387-
for sql in sql_file_queries(dbgym_workspace, filepath):
388-
sqlalchemy_conn_execute(conn, sql)
389-
390-
391-
# The reason pgport is an argument is because when doing agnet HPO, we want to run multiple instances of Postgres
392-
# at the same time. In this situation, they need to have different ports
393-
def get_connstr(pgport: int = DEFAULT_POSTGRES_PORT, use_psycopg: bool = True) -> str:
394-
connstr_suffix = f"{DBGYM_POSTGRES_USER}:{DBGYM_POSTGRES_PASS}@localhost:{pgport}/{DBGYM_POSTGRES_DBNAME}"
395-
# use_psycopg means whether or not we use the psycopg.connect() function
396-
# counterintuively, you *don't* need psycopg in the connection string if you *are*
397-
# using the psycopg.connect() function
398-
connstr_prefix = "postgresql" if use_psycopg else "postgresql+psycopg"
399-
return connstr_prefix + "://" + connstr_suffix
400-
401-
402-
def get_kv_connstr(pgport: int = DEFAULT_POSTGRES_PORT) -> str:
403-
return f"host=localhost port={pgport} user={DBGYM_POSTGRES_USER} password={DBGYM_POSTGRES_PASS} dbname={DBGYM_POSTGRES_DBNAME}"
404-
405-
406-
def create_psycopg_conn(pgport: int = DEFAULT_POSTGRES_PORT) -> psycopg.Connection[Any]:
407-
connstr = get_connstr(use_psycopg=True, pgport=pgport)
408-
psycopg_conn = psycopg.connect(connstr, autocommit=True, prepare_threshold=None)
409-
return psycopg_conn
410-
411-
412-
def create_sqlalchemy_conn(
413-
pgport: int = DEFAULT_POSTGRES_PORT,
414-
) -> sqlalchemy.Connection:
415-
connstr = get_connstr(use_psycopg=False, pgport=pgport)
416-
engine: sqlalchemy.Engine = create_engine(
417-
connstr,
418-
execution_options={"isolation_level": "AUTOCOMMIT"},
419-
)
420-
return engine.connect()
421-
422-
423-
def get_is_postgres_running() -> bool:
424-
"""
425-
This is often used in assertions to ensure that Postgres isn't running before we
426-
execute some code.
427-
428-
I intentionally do not have a function that forcefully *stops* all Postgres instances.
429-
This is risky because it could accidentally stop instances it wasn't supposed (e.g.
430-
Postgres instances run by other users on the same machine).
431-
432-
Stopping Postgres instances is thus a responsibility of the human to take care of.
433-
"""
434-
return len(get_running_postgres_ports()) > 0
435-
436-
437-
def get_running_postgres_ports() -> list[int]:
438-
"""
439-
Returns a list of all ports on which Postgres is currently running.
440-
441-
There are ways to check with psycopg/sqlalchemy. However, I chose to check using
442-
psutil to keep it as simple as possible and orthogonal to how connections work.
443-
"""
444-
running_ports = []
445-
446-
for conn in psutil.net_connections(kind="inet"):
447-
if conn.status == "LISTEN":
448-
try:
449-
proc = psutil.Process(conn.pid)
450-
if proc.name() == "postgres":
451-
running_ports.append(conn.laddr.port)
452-
except (psutil.NoSuchProcess, psutil.AccessDenied):
453-
continue
454-
455-
return running_ports

gymlib_package/gymlib/pg.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
"""
2+
There are multiple parts of the codebase which interact with Postgres. This file contains helpers common to all those parts.
3+
"""
4+
5+
from pathlib import Path
6+
from typing import Any
7+
8+
import pglast
9+
import psutil
10+
import psycopg
11+
import sqlalchemy
12+
from gymlib.workspace import DBGymWorkspace
13+
from sqlalchemy import create_engine, text
14+
15+
DBGYM_POSTGRES_USER = "dbgym_user"
16+
DBGYM_POSTGRES_PASS = "dbgym_pass"
17+
DBGYM_POSTGRES_DBNAME = "dbgym"
18+
DEFAULT_POSTGRES_DBNAME = "postgres"
19+
DEFAULT_POSTGRES_PORT = 5432
20+
SHARED_PRELOAD_LIBRARIES = "boot,pg_hint_plan,pg_prewarm"
21+
22+
23+
def sqlalchemy_conn_execute(
24+
conn: sqlalchemy.Connection, sql: str
25+
) -> sqlalchemy.engine.CursorResult[Any]:
26+
return conn.execute(text(sql))
27+
28+
29+
def sql_file_queries(dbgym_workspace: DBGymWorkspace, filepath: Path) -> list[str]:
30+
with dbgym_workspace.open_and_save(filepath) as f:
31+
lines: list[str] = []
32+
for line in f:
33+
if line.startswith("--"):
34+
continue
35+
if len(line.strip()) == 0:
36+
continue
37+
lines.append(line)
38+
queries_str = "".join(lines)
39+
queries: list[str] = pglast.split(queries_str)
40+
return queries
41+
42+
43+
def sql_file_execute(
44+
dbgym_workspace: DBGymWorkspace, conn: sqlalchemy.Connection, filepath: Path
45+
) -> None:
46+
for sql in sql_file_queries(dbgym_workspace, filepath):
47+
sqlalchemy_conn_execute(conn, sql)
48+
49+
50+
# The reason pgport is an argument is because when doing agnet HPO, we want to run multiple instances of Postgres
51+
# at the same time. In this situation, they need to have different ports
52+
def get_connstr(pgport: int = DEFAULT_POSTGRES_PORT, use_psycopg: bool = True) -> str:
53+
connstr_suffix = f"{DBGYM_POSTGRES_USER}:{DBGYM_POSTGRES_PASS}@localhost:{pgport}/{DBGYM_POSTGRES_DBNAME}"
54+
# use_psycopg means whether or not we use the psycopg.connect() function
55+
# counterintuively, you *don't* need psycopg in the connection string if you *are*
56+
# using the psycopg.connect() function
57+
connstr_prefix = "postgresql" if use_psycopg else "postgresql+psycopg"
58+
return connstr_prefix + "://" + connstr_suffix
59+
60+
61+
def get_kv_connstr(pgport: int = DEFAULT_POSTGRES_PORT) -> str:
62+
return f"host=localhost port={pgport} user={DBGYM_POSTGRES_USER} password={DBGYM_POSTGRES_PASS} dbname={DBGYM_POSTGRES_DBNAME}"
63+
64+
65+
def create_psycopg_conn(pgport: int = DEFAULT_POSTGRES_PORT) -> psycopg.Connection[Any]:
66+
connstr = get_connstr(use_psycopg=True, pgport=pgport)
67+
psycopg_conn = psycopg.connect(connstr, autocommit=True, prepare_threshold=None)
68+
return psycopg_conn
69+
70+
71+
def create_sqlalchemy_conn(
72+
pgport: int = DEFAULT_POSTGRES_PORT,
73+
) -> sqlalchemy.Connection:
74+
connstr = get_connstr(use_psycopg=False, pgport=pgport)
75+
engine: sqlalchemy.Engine = create_engine(
76+
connstr,
77+
execution_options={"isolation_level": "AUTOCOMMIT"},
78+
)
79+
return engine.connect()
80+
81+
82+
def get_is_postgres_running() -> bool:
83+
"""
84+
This is often used in assertions to ensure that Postgres isn't running before we
85+
execute some code.
86+
87+
I intentionally do not have a function that forcefully *stops* all Postgres instances.
88+
This is risky because it could accidentally stop instances it wasn't supposed (e.g.
89+
Postgres instances run by other users on the same machine).
90+
91+
Stopping Postgres instances is thus a responsibility of the human to take care of.
92+
"""
93+
return len(get_running_postgres_ports()) > 0
94+
95+
96+
def get_running_postgres_ports() -> list[int]:
97+
"""
98+
Returns a list of all ports on which Postgres is currently running.
99+
100+
There are ways to check with psycopg/sqlalchemy. However, I chose to check using
101+
psutil to keep it as simple as possible and orthogonal to how connections work.
102+
"""
103+
running_ports = []
104+
105+
for conn in psutil.net_connections(kind="inet"):
106+
if conn.status == "LISTEN":
107+
try:
108+
proc = psutil.Process(conn.pid)
109+
if proc.name() == "postgres":
110+
running_ports.append(conn.laddr.port)
111+
except (psutil.NoSuchProcess, psutil.AccessDenied):
112+
continue
113+
114+
return running_ports

gymlib_package/gymlib/pg_conn.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,11 @@
1919
import psutil
2020
import psycopg
2121
import yaml
22+
from gymlib.pg import DBGYM_POSTGRES_DBNAME, SHARED_PRELOAD_LIBRARIES, get_kv_connstr
2223
from gymlib.workspace import DBGymWorkspace, parent_path_of_path
2324
from plumbum import local
2425
from psycopg.errors import ProgramLimitExceeded, QueryCanceled
2526

26-
from dbms.postgres.cli import (
27-
DBGYM_POSTGRES_DBNAME,
28-
SHARED_PRELOAD_LIBRARIES,
29-
get_kv_connstr,
30-
)
31-
3227
CONNECT_TIMEOUT = 300
3328

3429

gymlib_package/gymlib/tests/integtest_pg_conn.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,14 @@
22
import unittest
33

44
import psycopg
5-
from gymlib.pg_conn import PostgresConn
6-
from gymlib.tests.gymlib_integtest_util import GymlibIntegtestManager
7-
from gymlib.workspace import DBGymWorkspace
8-
9-
from dbms.postgres.cli import (
5+
from gymlib.pg import (
106
DEFAULT_POSTGRES_PORT,
117
get_is_postgres_running,
128
get_running_postgres_ports,
139
)
10+
from gymlib.pg_conn import PostgresConn
11+
from gymlib.tests.gymlib_integtest_util import GymlibIntegtestManager
12+
from gymlib.workspace import DBGymWorkspace
1413

1514

1615
class PostgresConnTests(unittest.TestCase):

orchestrate/replay.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
from collections import defaultdict
22
from pathlib import Path
33

4+
from gymlib.pg import DEFAULT_POSTGRES_PORT
45
from gymlib.pg_conn import PostgresConn
56
from gymlib.tuning_artifacts import TuningArtifactsReader
67
from gymlib.workload import Workload
78
from gymlib.workspace import DBGymWorkspace
89

9-
from dbms.postgres.cli import DEFAULT_POSTGRES_PORT
10-
1110

1211
def replay(
1312
dbgym_workspace: DBGymWorkspace, tuning_artifacts_path: Path

0 commit comments

Comments
 (0)