@@ -67,53 +67,34 @@ def convert(
6767 else :
6868 sources = "[" + "," .join ([f'"{ url } "' for url in urls ]) + "]"
6969
70- _collection = self .create_collection (cid )
71- _collection .update (self .column_additions )
72- _collection ["collection" ] = self .id
73- collection = json .dumps (_collection , cls = VecorelJSONEncoder ).encode ("utf-8" )
74-
75- schemas = _collection .merge_schemas ({})
76- props = schemas .get ("properties" , {})
77- required = schemas .get ("required" , [])
78- pq_fields = []
79- for column in self .columns .values ():
80- schema = props .get (column , {})
81- dtype = schema .get ("type" )
82- if dtype is None :
83- self .warning (f"{ column } : No mapping" )
84- continue
85- try :
86- field = get_pyarrow_field (column , schema = schema , required = column in required )
87- pq_fields .append (field )
88- except Exception as e :
89- self .warning (f"{ column } : Skipped - { e } " )
70+ collection = self .create_collection (cid )
71+ collection .update (self .column_additions )
72+ collection ["collection" ] = self .id
9073
9174 if isinstance (output_file , Path ):
9275 output_file = str (output_file )
9376
94- pq_schema = pa .schema (pq_fields )
95- schema_bytes = pq_schema .serialize ().to_pybytes ()
96- # pq_schema = pq_schema.with_metadata({"collection": collection})
77+ collection_json = json .dumps (collection , cls = VecorelJSONEncoder ).encode ("utf-8" )
9778
9879 con = duckdb .connect ()
9980 con .install_extension ("spatial" )
10081 con .load_extension ("spatial" )
10182 con .execute (
10283 f"""
10384 COPY (
104- SELECT { selection } FROM read_parquet({ sources } , union_by_name=true)
85+ SELECT { selection }
86+ FROM read_parquet({ sources } , union_by_name=true)
10587 { where }
10688 ORDER BY ST_Hilbert({ geom_column } )
10789 ) TO ? (
10890 FORMAT parquet,
109- compression 'brotli ',
91+ compression '{ compression } ',
11092 KV_METADATA {{
11193 collection: ?,
112- "PYARROW:schema": ?
11394 }}
11495 )
11596 """ ,
116- [output_file , collection , schema_bytes ],
97+ [output_file , collection_json ],
11798 )
11899
119100 return output_file
0 commit comments