diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 06b8391..5056304 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -39,10 +39,8 @@ jobs: SAAS_ACCOUNT_ID: ${{ secrets.INTEGRATION_TEAM_SAAS_STAGING_ACCOUNT_ID }} SAAS_PAT: ${{ secrets.INTEGRATION_TEAM_SAAS_STAGING_PAT }} PYTEST_ADDOPTS: '-o log_cli=true -o log_cli_level=INFO ${{ steps.pytest-markers.outputs.slow-tests }}' - run: | - echo "PYTEST_ADDOPTS = $PYTEST_ADDOPTS" - export PROJECT_SHORT_TAG=$(poetry run -- nox -s project:get-short-tag) - poetry run -- nox -s test:coverage -- + PROJECT_SHORT_TAG: SAPIPY + run: poetry run -- nox -s test:coverage -- - name: Upload Artifacts uses: actions/upload-artifact@v4.6.2 diff --git a/doc/changes/changelog.md b/doc/changes/changelog.md index 0355385..a5a21e1 100644 --- a/doc/changes/changelog.md +++ b/doc/changes/changelog.md @@ -1,6 +1,7 @@ # Changes * [unreleased](unreleased.md) +* [2.6.0](changes_2.6.0.md) * [2.5.0](changes_2.5.0.md) * [2.4.0](changes_2.4.0.md) * [2.3.0](changes_2.3.0.md) @@ -27,6 +28,7 @@ hidden: --- unreleased +changes_2.6.0 changes_2.5.0 changes_2.4.0 changes_2.3.0 diff --git a/doc/changes/changes_2.6.0.md b/doc/changes/changes_2.6.0.md new file mode 100644 index 0000000..dbc5013 --- /dev/null +++ b/doc/changes/changes_2.6.0.md @@ -0,0 +1,11 @@ +# 2.6.0 - 2025-12-11 + +## Summary + +This release changes the strategy for deleting a SaaS database instance, as required by the changed behavior of the actual SaaS backend. + +Before, SAPIPY used only a fixed waiting time. In case of HTTP responses with status code 400 and message `Operation.*not allowed.*cluster.*not.*in.*proper state` SAPIPY now retries deleting the SaaS instance for max. 5 minutes. + +## Bugfixes + +* #125: Added retry when deleting SaaS instance diff --git a/exasol/saas/client/api_access.py b/exasol/saas/client/api_access.py index 2319c43..e8eaafb 100644 --- a/exasol/saas/client/api_access.py +++ b/exasol/saas/client/api_access.py @@ -3,21 +3,28 @@ import datetime as dt import getpass import logging +import re import time from collections.abc import Iterable from contextlib import contextmanager from datetime import ( datetime, timedelta, + timezone, ) from typing import Any +import tenacity from tenacity import ( TryAgain, retry, ) +from tenacity.retry import retry_if_exception from tenacity.stop import stop_after_delay -from tenacity.wait import wait_fixed +from tenacity.wait import ( + wait_exponential, + wait_fixed, +) from exasol.saas.client import ( Limits, @@ -38,6 +45,8 @@ delete_allowed_ip, list_allowed_i_ps, ) +from exasol.saas.client.openapi.errors import UnexpectedStatus +from exasol.saas.client.openapi.models.exasol_database import ExasolDatabase from exasol.saas.client.openapi.models.status import Status from exasol.saas.client.openapi.types import UNSET @@ -45,24 +54,30 @@ LOG.setLevel(logging.INFO) +def interval_retry(interval: timedelta, timeout: timedelta): + return tenacity.retry(wait=wait_fixed(interval), stop=stop_after_delay(timeout)) + + def timestamp_name(project_short_tag: str | None = None) -> str: """ project_short_tag: Abbreviation of your project """ - timestamp = f"{datetime.now().timestamp():.0f}" + timestamp = f"{datetime.now(timezone.utc).timestamp():.0f}" owner = getpass.getuser() candidate = f"{timestamp}{project_short_tag or ''}-{owner}" return candidate[: Limits.MAX_DATABASE_NAME_LENGTH] -def wait_for_delete_clearance(start: dt.datetime): - lifetime = datetime.now() - start - if lifetime < Limits.MIN_DATABASE_LIFETIME: - wait = Limits.MIN_DATABASE_LIFETIME - lifetime - LOG.info( - f"Waiting {int(wait.seconds)} seconds" " before deleting the database." - ) - time.sleep(wait.seconds) +def indicates_retry(ex: BaseException) -> bool: + """ + When deleting a SaaS instance raises an UnexpectedStatus, then this + function decides whether we should retry to delete the database instance. + """ + return bool( + isinstance(ex, UnexpectedStatus) + and ex.status_code == 400 + and "cluster is not in a proper state" in ex.content.decode("utf-8") + ) class DatabaseStartupFailure(Exception): @@ -202,7 +217,7 @@ def create_database( cluster_size: str = "XS", region: str = "eu-central-1", idle_time: timedelta | None = None, - ) -> openapi.models.exasol_database.ExasolDatabase | None: + ) -> ExasolDatabase | None: def minutes(x: timedelta) -> int: return x.seconds // 60 @@ -215,7 +230,7 @@ def minutes(x: timedelta) -> int: idle_time=minutes(idle_time), ), ) - LOG.info(f"Creating database {name}") + LOG.info("Creating database %s", name) return create_database.sync( self._account_id, client=self._client, @@ -241,7 +256,7 @@ def wait_until_deleted( timeout: timedelta = timedelta(seconds=1), interval: timedelta = timedelta(minutes=1), ): - @retry(wait=wait_fixed(interval), stop=stop_after_delay(timeout)) + @interval_retry(interval, timeout) def still_exists() -> bool: result = database_id in self.list_database_ids() if result: @@ -251,12 +266,45 @@ def still_exists() -> bool: if still_exists(): raise DatabaseDeleteTimeout - def delete_database(self, database_id: str, ignore_failures=False): - with self._ignore_failures(ignore_failures) as client: - return delete_database.sync_detailed( - self._account_id, database_id, client=client + def delete_database( + self, + database_id: str, + ignore_failures: bool = False, + timeout: timedelta = timedelta(minutes=30), + min_interval: timedelta = timedelta(seconds=1), + max_interval: timedelta = timedelta(minutes=2), + ) -> None: + @retry( + reraise=True, + wait=wait_exponential( + multiplier=1, + min=min_interval, + max=max_interval, + ), + stop=stop_after_delay(timeout), + retry=retry_if_exception(indicates_retry), + ) + def delete_with_retry(): + LOG.info("Trying to delete database with ID %s ...", database_id) + delete_database.sync_detailed( + self._account_id, + database_id, + client=self._client, ) + try: + delete_with_retry() + LOG.info("Deleted database with ID %s", database_id) + except Exception as ex: + if ignore_failures: + LOG.error( + "Ignoring failure when deleting database with ID %s: %s", + database_id, + ex, + ) + else: + raise + def list_database_ids(self) -> Iterable[str]: dbs = list_databases.sync(self._account_id, client=self._client) or [] return (db.id for db in dbs) @@ -270,29 +318,21 @@ def database( idle_time: timedelta | None = None, ): db = None - start = datetime.now() try: db = self.create_database(name, idle_time=idle_time) yield db - wait_for_delete_clearance(start) finally: db_repr = f"{db.name} with ID {db.id}" if db else None - if db and not keep: - LOG.info(f"Deleting database {db_repr}") - response = self.delete_database(db.id, ignore_delete_failure) - if response.status_code == 200: - LOG.info(f"Successfully deleted database {db_repr}.") - else: - LOG.warning(f"Ignoring status code {response.status_code}.") - elif not db: - LOG.warning("Cannot delete db None") + if not db: + LOG.warning("Cannot delete database None") + elif keep: + LOG.info("Keeping database %s as keep = %s.", db_repr, keep) else: - LOG.info(f"Keeping database {db_repr} as keep = {keep}") + LOG.info("Deleting database %s", db_repr) + self.delete_database(db.id, ignore_delete_failure) + LOG.info("Successfully deleted database %s.", db_repr) - def get_database( - self, - database_id: str, - ) -> openapi.models.exasol_database.ExasolDatabase | None: + def get_database(self, database_id: str) -> ExasolDatabase | None: return get_database.sync( self._account_id, database_id, @@ -305,17 +345,16 @@ def wait_until_running( timeout: timedelta = timedelta(minutes=30), interval: timedelta = timedelta(minutes=2), ): - success = [ - Status.RUNNING, - ] + success = [Status.RUNNING] - @retry(wait=wait_fixed(interval), stop=stop_after_delay(timeout)) - def poll_status(): + @interval_retry(interval, timeout) + def poll_status() -> Status: db = self.get_database(database_id) - if db.status not in success: - LOG.info("- Database status: %s ...", db.status) + status = db.status if db else None + if status not in success: + LOG.info("- Database status: %s ...", status) raise TryAgain - return db.status + return status if poll_status() not in success: raise DatabaseStartupFailure() diff --git a/noxfile.py b/noxfile.py index ddb18ce..96cb616 100644 --- a/noxfile.py +++ b/noxfile.py @@ -127,18 +127,3 @@ def check_api_outdated(session: Session): """ generate_api(session) session.run("git", "diff", "--exit-code", DEST_DIR) - - -@nox.session(name="project:get-short-tag", python=False) -def get_project_short_tag(session: Session): - config_file = Path("error_code_config.yml") - content = config_file.read_text() - header = False - for line in content.splitlines(): - line = line.strip() - if header: - print(line.strip().replace(":", "")) - return - if line.startswith("error-tags:"): - header = True - raise RuntimeError(f"Could not read project short tag from file {config_file}") diff --git a/pyproject.toml b/pyproject.toml index d51b347..ca36aac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "exasol-saas-api" -version = "2.5.0" +version = "2.6.0" requires-python = ">=3.10.0,<4.0" description = "API enabling Python applications connecting to Exasol database SaaS instances and using their SaaS services" authors = [ diff --git a/test/.placeholder b/test/__init__.py similarity index 100% rename from test/.placeholder rename to test/__init__.py diff --git a/test/integration/allowed_ip_test.py b/test/integration/allowed_ip_test.py index 1aa5472..8b92637 100644 --- a/test/integration/allowed_ip_test.py +++ b/test/integration/allowed_ip_test.py @@ -1,3 +1,7 @@ +import pytest + + +@pytest.mark.slow def test_lifecycle(api_access): testee = api_access with testee.allowed_ip(ignore_delete_failure=True) as ip: diff --git a/test/integration/conftest.py b/test/integration/conftest.py index b8b2228..2fc825e 100644 --- a/test/integration/conftest.py +++ b/test/integration/conftest.py @@ -59,12 +59,12 @@ def operational_saas_database_id(api_access, database_name) -> str: @pytest.fixture(scope="session") -def project_short_tag(): - return os.environ.get("PROJECT_SHORT_TAG") +def project_short_tag() -> str: + return "SAPIPY" @pytest.fixture(scope="session") -def database_name(project_short_tag): +def database_name(project_short_tag) -> str: return timestamp_name(project_short_tag) diff --git a/test/integration/connection_test.py b/test/integration/connection_test.py index 0f6695f..4cbcb53 100644 --- a/test/integration/connection_test.py +++ b/test/integration/connection_test.py @@ -1,47 +1,57 @@ +import contextlib +import ssl + import pyexasol import pytest from exasol.saas.client.api_access import get_connection_params +SSL_OPTIONS = {"websocket_sslopt": {"cert_reqs": ssl.CERT_NONE}} + + +@pytest.fixture +def pyexasol_connection( + saas_host, + saas_pat, + saas_account_id, + allow_connection, + operational_saas_database_id, +): + @contextlib.contextmanager + def connect(**kwargs): + params = get_connection_params( + host=saas_host, + account_id=saas_account_id, + pat=saas_pat, + **kwargs, + ) + params.update(SSL_OPTIONS) + with pyexasol.connect(**params) as con: + yield con + + return connect + @pytest.mark.slow def test_get_connection_params_with_id( - saas_host, saas_pat, saas_account_id, operational_saas_database_id, allow_connection + pyexasol_connection, + operational_saas_database_id, ): """ This integration test checks that opening a pyexasol connection to a SaaS DB with known id and executing a query works. """ - connection_params = get_connection_params( - host=saas_host, - account_id=saas_account_id, - pat=saas_pat, - database_id=operational_saas_database_id, - ) - with pyexasol.connect(**connection_params) as pyconn: + with pyexasol_connection(database_id=operational_saas_database_id) as pyconn: result = pyconn.execute("SELECT 1;").fetchall() assert result == [(1,)] @pytest.mark.slow -def test_get_connection_params_with_name( - saas_host, - saas_pat, - saas_account_id, - operational_saas_database_id, - database_name, - allow_connection, -): +def test_get_connection_params_with_name(pyexasol_connection, database_name): """ This integration test checks that opening a pyexasol connection to a SaaS DB with known name and executing a query works. """ - connection_params = get_connection_params( - host=saas_host, - account_id=saas_account_id, - pat=saas_pat, - database_name=database_name, - ) - with pyexasol.connect(**connection_params) as pyconn: + with pyexasol_connection(database_name=database_name) as pyconn: result = pyconn.execute("SELECT 1;").fetchall() assert result == [(1,)] diff --git a/test/integration/databases_test.py b/test/integration/databases_test.py index b6399d6..ec1383f 100644 --- a/test/integration/databases_test.py +++ b/test/integration/databases_test.py @@ -9,11 +9,22 @@ from exasol.saas.client import PROMISING_STATES from exasol.saas.client.api_access import ( get_connection_params, - wait_for_delete_clearance, + timestamp_name, ) +from exasol.saas.client.openapi.models.exasol_database import ExasolDatabase -def test_lifecycle(api_access, database_name): +@pytest.fixture +def local_name(project_short_tag: str | None) -> str: + """ + Other than global fixture database_name this fixture uses scope + "function" to generate an individual name for each test case in this file. + """ + return timestamp_name(project_short_tag) + + +@pytest.mark.slow +def test_lifecycle(api_access, local_name): """ This integration test uses the database created and provided by pytest context ``_OpenApiAccess.database()`` to verify @@ -24,34 +35,32 @@ def test_lifecycle(api_access, database_name): - list_databases does not include the deleted database anymore """ - testee = api_access - with testee.database(database_name, ignore_delete_failure=True) as db: + def wait_until_running_too_short(db: ExasolDatabase): + api_access.wait_until_running( + db.id, + timeout=timedelta(seconds=3), + interval=timedelta(seconds=1), + ) + + def get_connection(db: ExasolDatabase): + clusters = api_access.clusters(db.id) + return api_access.get_connection(db.id, clusters[0].id) + + with api_access.database(local_name, ignore_delete_failure=True) as db: start = datetime.now() # verify state and clusters of created database assert db.status in PROMISING_STATES and db.clusters.total == 1 - # verify database is listed - assert db.id in testee.list_database_ids() - - # delete database and verify database is not listed anymore - wait_for_delete_clearance(start) - testee.delete_database(db.id) - testee.wait_until_deleted(db.id) - assert db.id not in testee.list_database_ids() - - -def test_poll(api_access, database_name): - with api_access.database(database_name) as db: with pytest.raises(RetryError): - api_access.wait_until_running( - db.id, - timeout=timedelta(seconds=3), - interval=timedelta(seconds=1), - ) + wait_until_running_too_short(db) + # verify database is listed + assert db.id in api_access.list_database_ids() -def test_get_connection(api_access, database_name): - with api_access.database(database_name) as db: - clusters = api_access.clusters(db.id) - connection = api_access.get_connection(db.id, clusters[0].id) - assert connection.db_username is not None and connection.port == 8563 + con = get_connection(db) + assert con.db_username is not None and con.port == 8563 + + # delete database and verify database is not listed anymore + api_access.delete_database(db.id) + api_access.wait_until_deleted(db.id) + assert db.id not in api_access.list_database_ids() diff --git a/test/integration/test_placeholder.py b/test/integration/test_placeholder.py new file mode 100644 index 0000000..1d09f30 --- /dev/null +++ b/test/integration/test_placeholder.py @@ -0,0 +1,4 @@ +def test_placeholder(): + """ + Prevent failure when call pytest "-m not slow" test/integration. + """ diff --git a/test/unit/test_api_access.py b/test/unit/test_api_access.py new file mode 100644 index 0000000..f936705 --- /dev/null +++ b/test/unit/test_api_access.py @@ -0,0 +1,123 @@ +from datetime import timedelta +from test.util import not_raises +from unittest.mock import Mock + +import pytest + +from exasol.saas.client.api_access import ( + OpenApiAccess, + indicates_retry, +) +from exasol.saas.client.openapi.errors import UnexpectedStatus + +RETRY_EXCEPTION = UnexpectedStatus( + 400, b"Operation is not allowed:The cluster is not in a proper state!" +) + + +@pytest.mark.parametrize( + "exception, expected", + [ + pytest.param(RuntimeError("bla"), False, id="other_exception"), + pytest.param(UnexpectedStatus(404, b"bla"), False, id="other_status_code"), + pytest.param(UnexpectedStatus(400, b"bla"), False, id="other_message"), + pytest.param(RETRY_EXCEPTION, True, id="indicates_retry"), + ], +) +def test_indicates_retry(exception, expected): + """ + Call function api_access.indicates_retry() with different exceptions + in order to verify if it correctly rates the current exception as + indicating to retry deleting a SaaS database instance. + """ + assert indicates_retry(exception) == expected + + +class ApiRunner: + def __init__(self, mocker): + self.api = OpenApiAccess(Mock(), account_id="A1") + self._mocker = mocker + self.mock = None + + def mock_delete(self, side_effect): + self.mock = Mock(side_effect=side_effect) + self._mocker.patch( + "exasol.saas.client.api_access." "delete_database.sync_detailed", self.mock + ) + + +@pytest.fixture +def api_runner(mocker) -> ApiRunner: + return ApiRunner(mocker) + + +@pytest.fixture +def retry_timings() -> dict[str, timedelta]: + """ + Common timings, used by some of the test cases in this file. + """ + interval = timedelta(seconds=0.2) + return { + "min_interval": interval, + "max_interval": interval, + "timeout": timedelta(seconds=0.5), + } + + +@pytest.mark.parametrize( + "side_effect", + [ + pytest.param([UnexpectedStatus(400, b"bla")], id="immediate_failure"), + pytest.param( + [RETRY_EXCEPTION, RETRY_EXCEPTION, UnexpectedStatus(400, b"bla")], + id="failure_after_retry", + ), + pytest.param( + [RETRY_EXCEPTION for _ in range(4)], + id="timeout_after_too_many_retries", + ), + ], +) +def test_delete_fail(side_effect, api_runner, retry_timings) -> None: + api_runner.mock_delete(side_effect) + with pytest.raises(UnexpectedStatus): + api_runner.api.delete_database("123", **retry_timings) + + +@pytest.mark.parametrize( + "side_effect, ignore_failures, expected_log_message", + [ + pytest.param( + [RETRY_EXCEPTION, None], + False, + "", + id="success_after_retry", + ), + pytest.param( + [UnexpectedStatus(400, b"bla")], + True, + ( + "Ignoring failure when deleting database with" + " ID 123: Unexpected status code: 400" + ), + id="success_by_ignoring_failures", + ), + ], +) +def test_delete_success( + side_effect, + ignore_failures, + expected_log_message, + api_runner, + retry_timings, + caplog, +) -> None: + api_runner.mock_delete(side_effect) + with not_raises(Exception): + api_runner.api.delete_database( + database_id="123", + **retry_timings, + ignore_failures=ignore_failures, + ) + assert api_runner.mock.called + assert expected_log_message in caplog.text diff --git a/test/unit/test_placeholder.py b/test/unit/test_placeholder.py deleted file mode 100644 index 9fb9833..0000000 --- a/test/unit/test_placeholder.py +++ /dev/null @@ -1,5 +0,0 @@ -"""unit tests""" - - -def test_placeholder(): - """doc""" diff --git a/test/util.py b/test/util.py new file mode 100644 index 0000000..60b0003 --- /dev/null +++ b/test/util.py @@ -0,0 +1,11 @@ +import contextlib + +import pytest + + +@contextlib.contextmanager +def not_raises(exception): + try: + yield + except exception: + raise pytest.fail(f"Did raise {exception}") diff --git a/version.py b/version.py index 05ae1c5..d182308 100644 --- a/version.py +++ b/version.py @@ -9,7 +9,7 @@ """ MAJOR = 2 -MINOR = 5 +MINOR = 6 PATCH = 0 VERSION = f"{MAJOR}.{MINOR}.{PATCH}" __version__ = VERSION