Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,4 @@ private void aiNodeRegisterAndRemoveTest(Statement statement) throws SQLExceptio
}
Assert.fail("The target AINode is not removed successfully after all retries.");
}

// TODO: We might need to add remove unknown test in the future, but current infrastructure is too
// hard to implement it.
}
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,15 @@ private void userDefinedModelManagementTest(Statement statement)
public void dropBuiltInModelErrorTestInTree() throws SQLException {
try (Connection connection = EnvFactory.getEnv().getConnection(BaseEnv.TREE_SQL_DIALECT);
Statement statement = connection.createStatement()) {
errorTest(statement, "drop model sundial", "1510: Cannot delete built-in model: sundial");
errorTest(statement, "drop model sundial", "1506: Cannot delete built-in model: sundial");
}
}

@Test
public void dropBuiltInModelErrorTestInTable() throws SQLException {
try (Connection connection = EnvFactory.getEnv().getConnection(BaseEnv.TABLE_SQL_DIALECT);
Statement statement = connection.createStatement()) {
errorTest(statement, "drop model sundial", "1510: Cannot delete built-in model: sundial");
errorTest(statement, "drop model sundial", "1506: Cannot delete built-in model: sundial");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,16 +244,17 @@ public enum TSStatusCode {
CQ_UPDATE_LAST_EXEC_TIME_ERROR(1403),

// AI
CREATE_MODEL_ERROR(1500),
DROP_MODEL_ERROR(1501),
MODEL_EXIST_ERROR(1502),
GET_MODEL_INFO_ERROR(1503),
NO_REGISTERED_AI_NODE_ERROR(1504),
MODEL_NOT_FOUND_ERROR(1505),
REGISTER_AI_NODE_ERROR(1506),
UNAVAILABLE_AI_DEVICE_ERROR(1507),
AI_NODE_INTERNAL_ERROR(1510),
REMOVE_AI_NODE_ERROR(1511),
NO_REGISTERED_AI_NODE_ERROR(1500),
REGISTER_AI_NODE_ERROR(1501),
REMOVE_AI_NODE_ERROR(1502),
MODEL_EXISTED_ERROR(1503),
MODEL_NOT_EXIST_ERROR(1504),
CREATE_MODEL_ERROR(1505),
DROP_BUILTIN_MODEL_ERROR(1506),
DROP_MODEL_ERROR(1507),
UNAVAILABLE_AI_DEVICE_ERROR(1508),

AINODE_INTERNAL_ERROR(1599), // In case somebody too lazy to add a new error code

// Pipe Plugin
CREATE_PIPE_PLUGIN_ERROR(1600),
Expand Down
8 changes: 4 additions & 4 deletions iotdb-core/ainode/iotdb/ainode/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
AINODE_THRIFT_COMPRESSION_ENABLED,
AINODE_VERSION_INFO,
)
from iotdb.ainode.core.exception import BadNodeUrlError
from iotdb.ainode.core.exception import BadNodeUrlException
from iotdb.ainode.core.log import Logger
from iotdb.ainode.core.util.decorator import singleton
from iotdb.thrift.common.ttypes import TEndPoint
Expand Down Expand Up @@ -437,7 +437,7 @@ def _load_config_from_file(self) -> None:
file_configs["ain_cluster_ingress_time_zone"]
)

except BadNodeUrlError:
except BadNodeUrlException:
logger.warning("Cannot load AINode conf file, use default configuration.")

except Exception as e:
Expand Down Expand Up @@ -489,12 +489,12 @@ def parse_endpoint_url(endpoint_url: str) -> TEndPoint:
"""
split = endpoint_url.split(":")
if len(split) != 2:
raise BadNodeUrlError(endpoint_url)
raise BadNodeUrlException(endpoint_url)

ip = split[0]
try:
port = int(split[1])
result = TEndPoint(ip, port)
return result
except ValueError:
raise BadNodeUrlError(endpoint_url)
raise BadNodeUrlException(endpoint_url)
33 changes: 9 additions & 24 deletions iotdb-core/ainode/iotdb/ainode/core/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,33 +81,18 @@
class TSStatusCode(Enum):
SUCCESS_STATUS = 200
REDIRECTION_RECOMMEND = 400
MODEL_EXIST_ERROR = 1502
MODEL_NOT_FOUND_ERROR = 1505
UNAVAILABLE_AI_DEVICE_ERROR = 1507
AINODE_INTERNAL_ERROR = 1510
MODEL_EXISTED_ERROR = 1503
MODEL_NOT_EXIST_ERROR = 1504
CREATE_MODEL_ERROR = 1505
DROP_BUILTIN_MODEL_ERROR = 1506
DROP_MODEL_ERROR = 1507
UNAVAILABLE_AI_DEVICE_ERROR = 1508

INVALID_URI_ERROR = 1511
INVALID_INFERENCE_CONFIG = 1512
INFERENCE_INTERNAL_ERROR = 1520

def get_status_code(self) -> int:
return self.value

AINODE_INTERNAL_ERROR = 1599 # In case somebody too lazy to add a new error code

class HyperparameterName(Enum):
# Training hyperparameter
LEARNING_RATE = "learning_rate"
EPOCHS = "epochs"
BATCH_SIZE = "batch_size"
USE_GPU = "use_gpu"
NUM_WORKERS = "num_workers"

# Structure hyperparameter
KERNEL_SIZE = "kernel_size"
INPUT_VARS = "input_vars"
BLOCK_TYPE = "block_type"
D_MODEL = "d_model"
INNER_LAYERS = "inner_layer"
OUTER_LAYERS = "outer_layer"

def name(self):
def get_status_code(self) -> int:
return self.value
108 changes: 40 additions & 68 deletions iotdb-core/ainode/iotdb/ainode/core/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
)


class _BaseError(Exception):
class _BaseException(Exception):
"""Base class for exceptions in this module."""

def __init__(self):
Expand All @@ -33,122 +33,94 @@ def __str__(self) -> str:
return self.message


class BadNodeUrlError(_BaseError):
class BadNodeUrlException(_BaseException):
def __init__(self, node_url: str):
super().__init__()
self.message = "Bad node url: {}".format(node_url)


class ModelNotExistError(_BaseError):
def __init__(self, msg: str):
self.message = "Model is not exists: {} ".format(msg)


class BadConfigValueError(_BaseError):
def __init__(self, config_name: str, config_value, hint: str = ""):
self.message = "Bad value [{0}] for config {1}. {2}".format(
config_value, config_name, hint
)

# ==================== Model Management ====================

class MissingConfigError(_BaseError):
def __init__(self, config_name: str):
self.message = "Missing config: {}".format(config_name)


class MissingOptionError(_BaseError):
def __init__(self, config_name: str):
self.message = "Missing task option: {}".format(config_name)
class ModelExistedException(_BaseException):
def __init__(self, model_id: str):
super().__init__()
self.message = "Model {} already exists".format(model_id)


class RedundantOptionError(_BaseError):
def __init__(self, option_name: str):
self.message = "Redundant task option: {}".format(option_name)
class ModelNotExistException(_BaseException):
def __init__(self, model_id: str):
super().__init__()
self.message = "Model {} does not exist".format(model_id)


class WrongTypeConfigError(_BaseError):
def __init__(self, config_name: str, expected_type: str):
self.message = "Wrong type for config: {0}, expected: {1}".format(
config_name, expected_type
class InvalidModelUriException(_BaseException):
def __init__(self, msg: str):
super().__init__()
self.message = (
"Model registration failed because the specified uri is invalid: {}".format(
msg
)
)


class UnsupportedError(_BaseError):
def __init__(self, msg: str):
self.message = "{0} is not supported in current version".format(msg)
class BuiltInModelDeletionException(_BaseException):
def __init__(self, model_id: str):
super().__init__()
self.message = "Cannot delete built-in model: {}".format(model_id)


class InvalidUriError(_BaseError):
def __init__(self, uri: str):
self.message = "Invalid uri: {}, there are no {} or {} under this uri.".format(
uri, MODEL_WEIGHTS_FILE_IN_PT, MODEL_CONFIG_FILE_IN_YAML
class BadConfigValueException(_BaseException):
def __init__(self, config_name: str, config_value, hint: str = ""):
super().__init__()
self.message = "Bad value [{0}] for config {1}. {2}".format(
config_value, config_name, hint
)


class InvalidWindowArgumentError(_BaseError):
def __init__(self, window_interval, window_step, dataset_length):
self.message = f"Invalid inference input: window_interval {window_interval}, window_step {window_step}, dataset_length {dataset_length}"


class InferenceModelInternalError(_BaseError):
class InferenceModelInternalException(_BaseException):
def __init__(self, msg: str):
super().__init__()
self.message = "Inference model internal error: {0}".format(msg)


class BuiltInModelNotSupportError(_BaseError):
class BuiltInModelNotSupportException(_BaseException):
def __init__(self, msg: str):
super().__init__()
self.message = "Built-in model not support: {0}".format(msg)


class BuiltInModelDeletionError(_BaseError):
def __init__(self, model_id: str):
self.message = "Cannot delete built-in model: {0}".format(model_id)


class WrongAttributeTypeError(_BaseError):
class WrongAttributeTypeException(_BaseException):
def __init__(self, attribute_name: str, expected_type: str):
super().__init__()
self.message = "Wrong type for attribute: {0}, expected: {1}".format(
attribute_name, expected_type
)


class NumericalRangeException(_BaseError):
class NumericalRangeException(_BaseException):
def __init__(self, attribute_name: str, value, min_value, max_value):
super().__init__()
self.message = (
"Attribute {0} expect value between {1} and {2}, got {3} instead.".format(
attribute_name, min_value, max_value, value
)
)


class StringRangeException(_BaseError):
class StringRangeException(_BaseException):
def __init__(self, attribute_name: str, value: str, expect_value):
super().__init__()
self.message = "Attribute {0} expect value in {1}, got {2} instead.".format(
attribute_name, expect_value, value
)


class ListRangeException(_BaseError):
class ListRangeException(_BaseException):
def __init__(self, attribute_name: str, value: list, expected_type: str):
super().__init__()
self.message = (
"Attribute {0} expect value type list[{1}], got {2} instead.".format(
attribute_name, expected_type, value
)
)


class AttributeNotSupportError(_BaseError):
def __init__(self, model_name: str, attribute_name: str):
self.message = "Attribute {0} is not supported in model {1}".format(
attribute_name, model_name
)


# This is used to extract the key message in RuntimeError instead of the traceback message
def runtime_error_extractor(error_message):
pattern = re.compile(r"RuntimeError: (.+)")
match = pattern.search(error_message)

if match:
return match.group(1)
else:
return ""
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.
#

from iotdb.ainode.core.exception import InferenceModelInternalError
from iotdb.ainode.core.exception import InferenceModelInternalException
from iotdb.ainode.core.inference.dispatcher.abstract_dispatcher import (
AbstractDispatcher,
)
Expand All @@ -41,7 +41,7 @@ def _select_pool_by_hash(self, req, pool_ids) -> int:
"""
model_id = req.model_id
if not pool_ids:
raise InferenceModelInternalError(
raise InferenceModelInternalException(
f"No available pools for model {model_id}"
)
start_idx = hash(req.req_id) % len(pool_ids)
Expand All @@ -51,7 +51,7 @@ def _select_pool_by_hash(self, req, pool_ids) -> int:
state = self.pool_states[pool_id]
if state == PoolState.RUNNING:
return pool_id
raise InferenceModelInternalError(
raise InferenceModelInternalException(
f"No RUNNING pools available for model {model_id}"
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

import torch.multiprocessing as mp

from iotdb.ainode.core.exception import InferenceModelInternalError
from iotdb.ainode.core.exception import InferenceModelInternalException
from iotdb.ainode.core.inference.inference_request import (
InferenceRequest,
InferenceRequestProxy,
Expand Down Expand Up @@ -374,7 +374,7 @@ def add_request(self, req: InferenceRequest, infer_proxy: InferenceRequestProxy)
if not self.has_request_pools(model_id):
logger.error(f"[Inference] No pools found for model {model_id}.")
infer_proxy.set_result(None)
raise InferenceModelInternalError(
raise InferenceModelInternalException(
"Dispatch request failed, because no inference pools are init."
)
# TODO: Implement adaptive scaling based on requests.(e.g. lazy initialization)
Expand Down
6 changes: 3 additions & 3 deletions iotdb-core/ainode/iotdb/ainode/core/inference/pool_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import torch.multiprocessing as mp

from iotdb.ainode.core.exception import InferenceModelInternalError
from iotdb.ainode.core.exception import InferenceModelInternalException
from iotdb.ainode.core.inference.dispatcher.basic_dispatcher import BasicDispatcher
from iotdb.ainode.core.inference.inference_request import (
InferenceRequest,
Expand Down Expand Up @@ -90,14 +90,14 @@ def dispatch_request(

def get_request_pool(self, pool_id) -> InferenceRequestPool:
if pool_id not in self.pool_group:
raise InferenceModelInternalError(
raise InferenceModelInternalException(
f"[Inference][Pool-{pool_id}] Pool not found for model {self.model_id}"
)
return self.pool_group[pool_id][0]

def get_request_queue(self, pool_id) -> mp.Queue:
if pool_id not in self.pool_group:
raise InferenceModelInternalError(
raise InferenceModelInternalException(
f"[Inference][Pool-{pool_id}] Pool not found for model {self.model_id}"
)
return self.pool_group[pool_id][1]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

import torch

from iotdb.ainode.core.exception import InferenceModelInternalError
from iotdb.ainode.core.exception import InferenceModelInternalException
from iotdb.ainode.core.inference.pool_group import PoolGroup
from iotdb.ainode.core.inference.pool_scheduler.abstract_pool_scheduler import (
AbstractPoolScheduler,
Expand Down Expand Up @@ -113,7 +113,7 @@ def schedule(self, model_id: str) -> List[ScaleAction]:
if model_id not in self._request_pool_map:
pool_num = estimate_pool_size(self.DEFAULT_DEVICE, model_id)
if pool_num <= 0:
raise InferenceModelInternalError(
raise InferenceModelInternalException(
f"Not enough memory to run model {model_id}."
)
return [ScaleAction(ScaleActionType.SCALE_UP, pool_num, model_id)]
Expand Down
Loading