diff --git a/debug_hive.py b/debug_hive.py new file mode 100644 index 000000000000..d5be92268663 --- /dev/null +++ b/debug_hive.py @@ -0,0 +1,44 @@ +import socket +import sys + +host = 'hive-server' +port = 10000 + +print(f"Testing connectivity to {host}:{port}...") + +# Test 1: DNS Resolution +try: + ip = socket.gethostbyname(host) + print(f"DNS Resolved: {host} -> {ip}") +except Exception as e: + print(f"DNS Failed: {e}") + sys.exit(1) + +# Test 2: TCP Socket +try: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(5) + s.connect((ip, port)) + print(f"TCP Connection Successful to {ip}:{port}") + s.close() +except Exception as e: + print(f"TCP Connection Failed: {e}") + # sys.exit(1) # Continue to try PyHive if TCP fails? No, PyHive will definitely fail. + +# Test 3: PyHive Connection (Minimal) +print("Testing PyHive Connection...") +try: + from pyhive import hive + conn = hive.Connection(host=host, port=port, username='hive', auth='NOSASL') # Try NOSASL first + print("PyHive Connection (NOSASL): SUCCESS") + conn.close() +except Exception as e: + print(f"PyHive Connection (NOSASL) Failed: {e}") + +try: + from pyhive import hive + conn = hive.Connection(host=host, port=port, username='hive') # Default auth + print("PyHive Connection (Default): SUCCESS") + conn.close() +except Exception as e: + print(f"PyHive Connection (Default) Failed: {e}") diff --git a/docker-compose-non-dev.yml b/docker-compose-non-dev.yml index 5d221ab602c1..e4cc35992c83 100644 --- a/docker-compose-non-dev.yml +++ b/docker-compose-non-dev.yml @@ -25,15 +25,10 @@ # - Set SUPERSET_LOG_LEVEL=debug in docker/.env-local for detailed Superset logs # ----------------------------------------------------------------------- x-superset-volumes: - &superset-volumes # /app/pythonpath_docker will be appended to the PYTHONPATH in the final container + &superset-volumes - ./docker:/app/docker - superset_home:/app/superset_home -x-common-build: &common-build - context: . - target: dev - cache_from: - - apache/superset-cache:3.10-slim-trixie services: redis: @@ -42,12 +37,14 @@ services: restart: unless-stopped volumes: - redis:/data + networks: + - databricks-net db: env_file: - - path: docker/.env # default + - path: docker/.env required: true - - path: docker/.env-local # optional override + - path: docker/.env-local required: false image: postgres:16 container_name: superset_db @@ -55,15 +52,22 @@ services: volumes: - db_home:/var/lib/postgresql/data - ./docker/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d + networks: + - databricks-net superset: env_file: - - path: docker/.env # default + - path: docker/.env required: true - - path: docker/.env-local # optional override + - path: docker/.env-local required: false + environment: + SUPERSET_ENV: production + DEV_MODE: "false" build: - <<: *common-build + context: ./docker + dockerfile: Dockerfile.hive + image: superset-hive:latest container_name: superset_app command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"] user: "root" @@ -74,17 +78,22 @@ services: superset-init: condition: service_completed_successfully volumes: *superset-volumes + networks: + - databricks-net superset-init: container_name: superset_init - build: - <<: *common-build + image: superset-hive:latest + command: ["/app/docker/docker-init.sh"] env_file: - - path: docker/.env # default + - path: docker/.env required: true - - path: docker/.env-local # optional override + - path: docker/.env-local required: false + environment: + SUPERSET_ENV: production + DEV_MODE: "false" depends_on: db: condition: service_started @@ -92,25 +101,31 @@ services: condition: service_started user: "root" volumes: *superset-volumes + networks: + - databricks-net healthcheck: disable: true superset-worker: - build: - <<: *common-build + image: superset-hive:latest container_name: superset_worker command: ["/app/docker/docker-bootstrap.sh", "worker"] env_file: - - path: docker/.env # default + - path: docker/.env required: true - - path: docker/.env-local # optional override + - path: docker/.env-local required: false + environment: + SUPERSET_ENV: production + DEV_MODE: "false" restart: unless-stopped depends_on: superset-init: condition: service_completed_successfully user: "root" volumes: *superset-volumes + networks: + - databricks-net healthcheck: test: [ @@ -119,21 +134,25 @@ services: ] superset-worker-beat: - build: - <<: *common-build + image: superset-hive:latest container_name: superset_worker_beat command: ["/app/docker/docker-bootstrap.sh", "beat"] env_file: - - path: docker/.env # default + - path: docker/.env required: true - - path: docker/.env-local # optional override + - path: docker/.env-local required: false + environment: + SUPERSET_ENV: production + DEV_MODE: "false" restart: unless-stopped depends_on: superset-init: condition: service_completed_successfully user: "root" volumes: *superset-volumes + networks: + - databricks-net healthcheck: disable: true @@ -144,3 +163,8 @@ volumes: external: false redis: external: false + +networks: + databricks-net: + name: databricks-net + external: true \ No newline at end of file diff --git a/docker/Dockerfile.hive b/docker/Dockerfile.hive new file mode 100644 index 000000000000..7f149cffb617 --- /dev/null +++ b/docker/Dockerfile.hive @@ -0,0 +1,16 @@ +FROM apache/superset:latest + +USER root + +# Install system dependencies for thrift-sasl +RUN apt-get update -q \ + && apt-get install -yq --no-install-recommends \ + libsasl2-dev \ + build-essential \ + libpq-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install python dependencies inside the virtual environment using uv +RUN uv pip install --system --python /app/.venv pyhive thrift thrift-sasl psycopg2-binary + +USER superset diff --git a/docker/docker-bootstrap.sh b/docker/docker-bootstrap.sh index 58d71d25c250..279e055137b7 100755 --- a/docker/docker-bootstrap.sh +++ b/docker/docker-bootstrap.sh @@ -42,7 +42,7 @@ if [ "$CYPRESS_CONFIG" == "true" ]; then PORT=8081 fi # Skip postgres requirements installation for workers to avoid conflicts -if [[ "$DATABASE_DIALECT" == postgres* ]] && [ "$(whoami)" = "root" ] && [ "$1" != "worker" ] && [ "$1" != "beat" ]; then +if [[ "$DATABASE_DIALECT" == postgres* ]] && [ "$(whoami)" = "root" ] && [ "$1" != "worker" ] && [ "$1" != "beat" ] && [ "$SUPERSET_ENV" != "production" ]; then # older images may not have the postgres dev requirements installed echo "Installing postgres requirements" if command -v uv > /dev/null 2>&1; then diff --git a/docker/docker-init.sh b/docker/docker-init.sh index e4b25b5b187f..d3556a9684e3 100755 --- a/docker/docker-init.sh +++ b/docker/docker-init.sh @@ -16,6 +16,8 @@ # limitations under the License. # set -e +exec > >(tee -a /app/docker/debug.log) 2>&1 + # # Always install local overrides first