Skip to content

Commit 3cf3b8d

Browse files
committed
Export collection and set compression
1 parent 852fb6a commit 3cf3b8d

File tree

1 file changed

+8
-27
lines changed

1 file changed

+8
-27
lines changed

fiboa_cli/conversion/duckdb.py

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -67,53 +67,34 @@ def convert(
6767
else:
6868
sources = "[" + ",".join([f'"{url}"' for url in urls]) + "]"
6969

70-
_collection = self.create_collection(cid)
71-
_collection.update(self.column_additions)
72-
_collection["collection"] = self.id
73-
collection = json.dumps(_collection, cls=VecorelJSONEncoder).encode("utf-8")
74-
75-
schemas = _collection.merge_schemas({})
76-
props = schemas.get("properties", {})
77-
required = schemas.get("required", [])
78-
pq_fields = []
79-
for column in self.columns.values():
80-
schema = props.get(column, {})
81-
dtype = schema.get("type")
82-
if dtype is None:
83-
self.warning(f"{column}: No mapping")
84-
continue
85-
try:
86-
field = get_pyarrow_field(column, schema=schema, required=column in required)
87-
pq_fields.append(field)
88-
except Exception as e:
89-
self.warning(f"{column}: Skipped - {e}")
70+
collection = self.create_collection(cid)
71+
collection.update(self.column_additions)
72+
collection["collection"] = self.id
9073

9174
if isinstance(output_file, Path):
9275
output_file = str(output_file)
9376

94-
pq_schema = pa.schema(pq_fields)
95-
schema_bytes = pq_schema.serialize().to_pybytes()
96-
# pq_schema = pq_schema.with_metadata({"collection": collection})
77+
collection_json = json.dumps(collection, cls=VecorelJSONEncoder).encode("utf-8")
9778

9879
con = duckdb.connect()
9980
con.install_extension("spatial")
10081
con.load_extension("spatial")
10182
con.execute(
10283
f"""
10384
COPY (
104-
SELECT {selection} FROM read_parquet({sources}, union_by_name=true)
85+
SELECT {selection}
86+
FROM read_parquet({sources}, union_by_name=true)
10587
{where}
10688
ORDER BY ST_Hilbert({geom_column})
10789
) TO ? (
10890
FORMAT parquet,
109-
compression 'brotli',
91+
compression '{compression}',
11092
KV_METADATA {{
11193
collection: ?,
112-
"PYARROW:schema": ?
11394
}}
11495
)
11596
""",
116-
[output_file, collection, schema_bytes],
97+
[output_file, collection_json],
11798
)
11899

119100
return output_file

0 commit comments

Comments
 (0)