Skip to content

Commit 34b24a6

Browse files
kameshsampathsfc-gh-abozkurt
authored andcommitted
(feat!): Improve Local Developer and Quickstart experience
- improve docker builds and reduce final image size - Remove unneeded mounts in compose file - add taskfile for building and running the containers - add docker quickstart guide - add local development guide Signed-off-by: Aykut Bozkurt <[email protected]>
1 parent aa687ad commit 34b24a6

15 files changed

+2053
-118
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,5 @@ delete_old_cluster.sh
3939
**/isolation/output
4040
**/isolation/isolation.conf
4141
pg_lake_iceberg/logs/polaris.log
42+
.volume/
43+
Dockerfile.alpine

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ The connection above is to the pgduck_server on its port (default 5332), NOT to
101101
Once you set up the credential chain, you should set the `pg_lake_iceberg.default_location_prefix`. This is the location where Iceberg tables are stored:
102102

103103
```sql
104-
SET pg_lake_iceberg.default_location_prefix TO 's3://testbucketpglake';
104+
SET pg_lake_iceberg.default_location_prefix TO 's3://testbucket/pglake';
105105
```
106106

107107
You can also set the credentials on `pgduck_server` for [local development with `minio`](docs/building-from-source.md#running-s3-compatible-service-minio-locally).
@@ -137,7 +137,7 @@ SELECT table_name, metadata_location FROM iceberg_tables;
137137

138138
table_name | metadata_location
139139
-------------------+--------------------------------------------------------------------------------------------------------------------
140-
iceberg_test | s3://testbucketpglake/postgres/public/test/435029/metadata/00001-f0c6e20a-fd1c-4645-87c9-c0c64b92992b.metadata.json
140+
iceberg_test | s3://testbucket/pglake/postgres/public/test/435029/metadata/00001-f0c6e20a-fd1c-4645-87c9-c0c64b92992b.metadata.json
141141
```
142142

143143
### COPY to/from S3
@@ -148,11 +148,11 @@ You can import or export data directly using `COPY` in **Parquet**, **CSV**, or
148148
```sql
149149
-- Copy data from Postgres to S3 with format parquet
150150
-- Read from any data source, including iceberg tables, heap tables or any query results
151-
COPY (SELECT * FROM iceberg_test) TO 's3://testbucketpglake/parquet_data/iceberg_test.parquet';
151+
COPY (SELECT * FROM iceberg_test) TO 's3://testbucket/pglake/parquet_data/iceberg_test.parquet';
152152

153153
-- Copy back from S3 to any table in Postgres
154154
-- This example copies into an iceberg table, but could be heap table as well
155-
COPY iceberg_test FROM 's3://testbucketpglake/parquet_data/iceberg_test.parquet';
155+
COPY iceberg_test FROM 's3://testbucket/pglake/parquet_data/iceberg_test.parquet';
156156
```
157157

158158
### Create foreign table for files on s3
@@ -163,7 +163,7 @@ You can create a foreign table directly from a file or set of files without havi
163163
-- use the files under the path, can use * for all files
164164
CREATE FOREIGN TABLE parquet_table()
165165
SERVER pg_lake
166-
OPTIONS (path 's3://testbucketpglake/parquet_data/*.parquet');
166+
OPTIONS (path 's3://testbucket/pglake/parquet_data/*.parquet');
167167

168168
-- note that we infer the columns from the file
169169
\d parquet_table
@@ -173,7 +173,7 @@ OPTIONS (path 's3://testbucketpglake/parquet_data/*.parquet');
173173
key | integer | | | |
174174
val | text | | | |
175175
Server: pg_lake
176-
FDW options: (path 's3://testbucketpglake/parquet_data/*.parquet')
176+
FDW options: (path 's3://testbucket/pglake/parquet_data/*.parquet')
177177

178178
-- and, query it
179179
select count(*) from parquet_table;

docker/.dockerignore

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Git
2+
.git
3+
.gitignore
4+
.gitmodules
5+
6+
# Documentation
7+
*.md
8+
!README.md
9+
TASKFILE.md
10+
11+
# Task files
12+
Taskfile.yml
13+
14+
# Docker files
15+
docker-compose.yml
16+
.dockerignore
17+
18+
# CI/CD
19+
../.github
20+
21+
# Build artifacts
22+
*.tar.gz
23+
*.zip
24+
25+
# Logs
26+
*.log
27+
28+
# IDE
29+
.vscode
30+
.idea
31+
*.swp
32+
*.swo
33+
*~
34+
35+
# OS
36+
.DS_Store
37+
Thumbs.db
38+

docker/.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
PG_LAKE_REF=main
2-
PG_MAJOR=18
2+
PG_MAJOR=18

docker/Dockerfile

Lines changed: 140 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ ARG BASE_IMAGE_TAG
99

1010
# Set environment variables for non-interactive installations and PostgreSQL/PostGIS versions
1111
ENV PG_LAKE_REF=main
12-
ENV PG_MAJOR=18
1312
ARG PG16_VERSION=16.10
1413
ARG PG17_VERSION=17.6
1514
ARG PG18_VERSION=18.0
@@ -243,13 +242,11 @@ RUN git clone https://github.com/citusdata/pg_cron.git && \
243242
make install PG_CONFIG=$PGBASEDIR/pgsql-18/bin/pg_config && make clean PG_CONFIG=$PGBASEDIR/pgsql-18/bin/pg_config && \
244243
cd .. && rm -rf pg_cron
245244

246-
ENV PATH=/home/postgres/pgsql-$PG_MAJOR/bin:$PATH
247-
248245
# Install vcpkg as postgres user
249246
ARG VCPKG_VERSION=2025.01.13
250247

251248
RUN git clone https://github.com/Microsoft/vcpkg.git -b $VCPKG_VERSION /home/postgres/vcpkg && \
252-
./vcpkg/bootstrap-vcpkg.sh && \
249+
./vcpkg/bootstrap-vcpkg.sh && \
253250
./vcpkg/vcpkg install azure-identity-cpp azure-storage-blobs-cpp azure-storage-files-datalake-cpp openssl
254251

255252
ENV VCPKG_TOOLCHAIN_PATH="/home/postgres/vcpkg/scripts/buildsystems/vcpkg.cmake"
@@ -259,22 +256,151 @@ FROM dev_base AS base
259256

260257
# Clone pg_lake project
261258
RUN git clone https://github.com/snowflake-labs/pg_lake.git \
262-
--branch ${PG_LAKE_REF} --recurse-submodules /home/postgres/pg_lake
259+
--branch ${PG_LAKE_REF} --recurse-submodules /home/postgres/pg_lake
260+
261+
############## pg_lake_builder - Build all pg_lake extensions ##############
262+
FROM base AS pg_lake_builder
263+
264+
# need to redefine ARGs in each stage
265+
ARG PG_MAJOR=18
263266

264-
############## pg_lake_postgres ##############
265-
FROM base AS pg_lake_postgres
267+
# Set environment variables for the selected PostgreSQL version
268+
ENV PG_MAJOR=${PG_MAJOR}
269+
ENV PATH=/home/postgres/pgsql-${PG_MAJOR}/bin:$PATH
266270

267-
# Install pg_lake
271+
# Install pg_lake extensions (build happens here, not in final image)
268272
RUN cd pg_lake && \
269273
make install-pg_lake_spatial && \
270274
make install-pg_lake_benchmark
271275

272-
RUN initdb -D $PGBASEDIR/pgsql-$PG_MAJOR/data -U postgres --locale=C.UTF-8 --data-checksums
273-
274-
############## pg_duck_server ##############
275-
FROM base AS pgduck_server
276+
############## pgduck_builder - Build duckdb_pglake and pgduck_server ##############
277+
FROM base AS pgduck_builder
276278

279+
# need to redefine ARGs in each stage
280+
ARG PG_MAJOR=18
277281
ARG PGCOMPAT_BUILD_CONFIG=Release
278282

279-
# Install pgduck_server
280-
RUN make install-pgduck_server
283+
# Set environment variables for the selected PostgreSQL version
284+
ENV PG_MAJOR=${PG_MAJOR}
285+
ENV PATH=/home/postgres/pgsql-${PG_MAJOR}/bin:$PATH
286+
287+
# Install pgduck_server (build happens here, not in final image)
288+
RUN cd pg_lake && \
289+
make install-pgduck_server && \
290+
rm -r duckdb_pglake/build
291+
292+
############## runtime_base - Minimal runtime environment ##############
293+
ARG BASE_IMAGE_OS="almalinux"
294+
ARG BASE_IMAGE_TAG="9"
295+
FROM ${BASE_IMAGE_OS}:${BASE_IMAGE_TAG} AS runtime_base
296+
297+
# need to redefine ARGs in each stage
298+
ARG BASE_IMAGE_OS
299+
ARG BASE_IMAGE_TAG
300+
301+
# Install ONLY runtime libraries (no -devel packages, no build tools)
302+
RUN if [ "$BASE_IMAGE_OS" = "almalinux" ]; then \
303+
dnf -y update && \
304+
dnf -y install epel-release && \
305+
dnf config-manager --enable crb && \
306+
dnf -y install --allowerasing \
307+
ca-certificates \
308+
readline \
309+
zlib \
310+
sudo \
311+
nano \
312+
libxml2 \
313+
libxslt \
314+
libicu \
315+
openssl \
316+
geos \
317+
proj \
318+
gdal \
319+
json-c \
320+
protobuf-c \
321+
uuid \
322+
lz4 \
323+
xz \
324+
snappy \
325+
perl \
326+
jansson \
327+
libcurl && \
328+
dnf clean all; \
329+
fi
330+
331+
RUN if [ "$BASE_IMAGE_OS" = "debian" ]; then \
332+
apt-get update \
333+
&& apt-get install -y \
334+
libreadline8 \
335+
zlib1g \
336+
libxml2 \
337+
libxslt1.1 \
338+
libicu72 \
339+
libssl3 \
340+
libgeos-c1v5 \
341+
libproj25 \
342+
libgdal32 \
343+
libjson-c5 \
344+
libprotobuf-c1 \
345+
uuid-runtime \
346+
libossp-uuid16 \
347+
liblz4-1 \
348+
liblzma5 \
349+
libsnappy1v5 \
350+
perl \
351+
libjansson4 \
352+
libcurl4 \
353+
curl \
354+
sudo \
355+
&& apt-get clean && rm -rf /var/lib/apt/lists/*; \
356+
fi
357+
358+
# Create the postgres user with UID 1001
359+
RUN useradd -u 1001 -m -s /bin/bash postgres
360+
RUN echo "postgres ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers.d/postgres
361+
USER 1001:1001
362+
WORKDIR /home/postgres
363+
364+
ENV PGBASEDIR=/home/postgres
365+
366+
############## pg_lake_postgres - Final runtime image (COPY ONLY) ##############
367+
FROM runtime_base AS pg_lake_postgres
368+
369+
# need to redefine ARGs in each stage
370+
ARG PG_MAJOR=18
371+
372+
# Set environment variables for runtime
373+
ENV PG_MAJOR=${PG_MAJOR}
374+
ENV PATH=/home/postgres/pgsql-${PG_MAJOR}/bin:$PATH
375+
376+
# Copy PostgreSQL binaries and libraries for the selected version from dev_base
377+
COPY --from=dev_base --chown=postgres:postgres /home/postgres/pgsql-${PG_MAJOR} /home/postgres/pgsql-${PG_MAJOR}
378+
379+
# Copy pg_lake extensions from pg_lake_builder (they're already installed to pgsql-* directories)
380+
# We need to copy the updated lib and share directories with pg_lake extensions
381+
COPY --from=pg_lake_builder --chown=postgres:postgres /home/postgres/pgsql-${PG_MAJOR}/lib /home/postgres/pgsql-${PG_MAJOR}/lib
382+
COPY --from=pg_lake_builder --chown=postgres:postgres /home/postgres/pgsql-${PG_MAJOR}/share /home/postgres/pgsql-${PG_MAJOR}/share
383+
384+
# Initialize database (lightweight operation, doesn't compile anything)
385+
RUN initdb -D $PGBASEDIR/pgsql-${PG_MAJOR}/data -U postgres --locale=C.UTF-8 --data-checksums
386+
387+
############## pgduck_server - Final runtime image (COPY ONLY) ##############
388+
FROM runtime_base AS pgduck_server
389+
390+
# need to redefine ARGs in each stage
391+
ARG PG_MAJOR=18
392+
393+
# Set environment variables for runtime
394+
ENV PG_MAJOR=${PG_MAJOR}
395+
ENV PATH=/home/postgres/pgsql-${PG_MAJOR}/bin:$PATH
396+
397+
# Copy PostgreSQL binaries and libraries for the selected version
398+
COPY --from=dev_base --chown=postgres:postgres /home/postgres/pgsql-${PG_MAJOR}/bin /home/postgres/pgsql-${PG_MAJOR}/bin
399+
COPY --from=dev_base --chown=postgres:postgres /home/postgres/pgsql-${PG_MAJOR}/lib /home/postgres/pgsql-${PG_MAJOR}/lib
400+
COPY --from=dev_base --chown=postgres:postgres /home/postgres/pgsql-${PG_MAJOR}/share /home/postgres/pgsql-${PG_MAJOR}/share
401+
402+
# Copy pgduck_server binaries and libraries from pgduck_builder
403+
# Note: duckdb_pglake installs libduckdb.so (not duckdb_pglake.so)
404+
COPY --from=pgduck_builder --chown=postgres:postgres /home/postgres/pgsql-${PG_MAJOR}/bin/ /home/postgres/pgsql-${PG_MAJOR}/bin/
405+
COPY --from=pgduck_builder --chown=postgres:postgres /home/postgres/pgsql-${PG_MAJOR}/lib/ /home/postgres/pgsql-${PG_MAJOR}/lib/
406+
COPY --from=pgduck_builder --chown=postgres:postgres /home/postgres/pgsql-${PG_MAJOR}/share/ /home/postgres/pgsql-${PG_MAJOR}/share/

0 commit comments

Comments
 (0)