Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ private RESTCatalogProperties() {}

public static final String NAMESPACE_SEPARATOR = "namespace-separator";

// Enable planning on the REST server side
public static final String REST_SCAN_PLANNING_ENABLED = "rest-scan-planning-enabled";
public static final boolean REST_SCAN_PLANNING_ENABLED_DEFAULT = false;
// Configure scan planning mode
// Can be set by server in LoadTableResponse.config() for table-level override
public static final String SCAN_PLANNING_MODE = "scan-planning-mode";

public static final String REST_SCAN_PLAN_ID = "rest-scan-plan-id";

Expand All @@ -59,4 +59,38 @@ public enum SnapshotMode {
ALL,
REFS
}

/**
* Enum to represent scan planning mode.
*
* <ul>
* <li>CLIENT - Use client-side scan planning
* <li>SERVER - Use server-side scan planning
* </ul>
*/
public enum ScanPlanningMode {
CLIENT("client"),
SERVER("server");

private final String modeName;

ScanPlanningMode(String modeName) {
this.modeName = modeName;
}

public String modeName() {
return modeName;
}

public static ScanPlanningMode fromString(String mode) {
for (ScanPlanningMode planningMode : values()) {
if (planningMode.modeName.equalsIgnoreCase(mode)) {
return planningMode;
}
}

throw new IllegalArgumentException(
String.format("Invalid scan planning mode: %s. Valid values are: client, server", mode));
}
}
}
52 changes: 39 additions & 13 deletions core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ public class RESTSessionCatalog extends BaseViewSessionCatalog
private MetricsReporter reporter = null;
private boolean reportingViaRestEnabled;
private Integer pageSize = null;
private boolean restScanPlanningEnabled;
private CloseableGroup closeables = null;
private Set<Endpoint> endpoints;
private Supplier<Map<String, String>> mutationHeaders = Map::of;
Expand Down Expand Up @@ -281,12 +280,6 @@ public void initialize(String name, Map<String, String> unresolved) {
RESTCatalogProperties.NAMESPACE_SEPARATOR,
RESTUtil.NAMESPACE_SEPARATOR_URLENCODED_UTF_8);

this.restScanPlanningEnabled =
PropertyUtil.propertyAsBoolean(
mergedProps,
RESTCatalogProperties.REST_SCAN_PLANNING_ENABLED,
RESTCatalogProperties.REST_SCAN_PLANNING_ENABLED_DEFAULT);

this.tableCache = createTableCache(mergedProps);
this.closeables.addCloseable(this.tableCache);

Expand Down Expand Up @@ -584,7 +577,7 @@ private Supplier<BaseTable> createTableSupplier(

trackFileIO(ops);

RESTTable table = restTableForScanPlanning(ops, identifier, tableClient);
RESTTable table = restTableForScanPlanning(ops, identifier, tableClient, tableConf);
if (table != null) {
return table;
}
Expand All @@ -595,9 +588,40 @@ private Supplier<BaseTable> createTableSupplier(
}

private RESTTable restTableForScanPlanning(
TableOperations ops, TableIdentifier finalIdentifier, RESTClient restClient) {
// server supports remote planning endpoint and server / client wants to do server side planning
if (endpoints.contains(Endpoint.V1_SUBMIT_TABLE_SCAN_PLAN) && restScanPlanningEnabled) {
TableOperations ops,
TableIdentifier finalIdentifier,
RESTClient restClient,
Map<String, String> tableConf) {
// Get client-side and server-side scan planning modes
String planningModeClientConfig = properties().get(RESTCatalogProperties.SCAN_PLANNING_MODE);
String planningModeServerConfig = tableConf.get(RESTCatalogProperties.SCAN_PLANNING_MODE);

// Validate that client and server configs don't conflict
Copy link
Contributor

@amogh-jahagirdar amogh-jahagirdar Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a choice here. We can either hard fail on conflicting configs like is done below or we can just let one override the other.

I think the least surprising thing from a client perspective is to just override using the client config in case of conflicts. On average I don't expect people to be fiddling with this config. In the chance they do, they're probably intentionally trying to set it and we should attempt to respect that. Even if there's some FGAC and creds and all aren't returned and it fails later on, that's better to me.

I guess I could go either way here (as long as we log there's a conflict and we're overriding using some mechanism), but I think any approach seems better than just hard failing since that seems unneccessary.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the least surprising thing from a client perspective is to just override using the client config in case of conflicts

It can cause an issue in a sense that let say i am low resource client i can't run scan planning at all, so i say use catalog and the catalog says do client ? i believe if we let catalog judgement win it may lead to failure ?
But i think other way to look at it we any way override the client config with the server one if server says for some configs we can do the same then here ?
I am also open to both !

Copy link
Contributor Author

@singhpk234 singhpk234 Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Talked to Amogh a bit more on this (I will add this to upcoming catalog sync agenda as well). They suggested It seems like there is a precedence for it foe example we define in config endpoint where overrides sent by server are applied on top of the client side configs, but can this be a special case in a sense, what if server always says do server mode and the client says otherwise, not letting client choose client side planning ?

One other option could be define a client only config for client to enforce this

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t really understand the motivation for client-side configuration here. Based on the specification:

- `scan-planning-mode`: Controls scan planning behavior for table operations. Valid values:
  - `client` (default): Clients MUST use client-side scan planning
  - `server`: Clients MUST use server-side scan planning

my interpretation is that the server fully determines the scan planning mode. If it specifies server, then server-side planning MUST be used; if it specifies client, or does not specify anything, then client-side planning MUST be used.

In this case, don’t really understand the motivation for having a client‑side configuration for scan planning.

The server provided scan-planning-mode controls how scan planning is performed for table operations. In this case, there is no remaining decision or configuration surface on the client side. If the client specifies a different value than the server, we simply fail (do we need a config just for failing?).

The only scenario in which client‑side specification could meaningful is when the server does not explicitly specify a planning mode but does support server‑side planning. In that case, the client may be able to choose between client‑side and server‑side planning. This, however, is not what the current spec appears to describe.

If we want to support this behavior, it should be stated explicitly in the specification: the client may decide only when the server has not specified a planning mode.
For example, the spec could say something like:

- `scan-planning-mode`: Controls scan planning behavior for table operations. Valid values:
  - `client`: Clients MUST use client-side scan planning
  - `server`: Clients MUST use server-side scan planning
  - If not specified, then the client is free to choose, provided that server-side planning is available

Is this the expected behavior?

Copy link
Contributor Author

@singhpk234 singhpk234 Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don’t really understand the motivation for having a client‑side configuration for scan planning

I believe this is stemming from the config section of the loadTable response in general, even though we specify some catalog level config it gets over-riden based on server response (its implicit) for example the file IO creds send in the config as well as things like client.region get overriden

## General Configurations

The client side config is mostly from POV can a client specify its choice on if they wanna do server side planning or not.

If the client specifies a different value than the server, we simply fail (do we need a config just for failing?)

yes that what i have for now, (but it seems like its kind of conflicts with the existing behaviour where server just override the client configs) if they are two different stuff then fail because we don't know want the server to just override the client, and client can't disobey server MUST. this will act as a fence in which client doesn't wanna do server side planning and server can't force it (may be scan planning perf is suboptimal / cost of this endpoint is high). I mostly looking for handling this case. would really appreciate your thoughts on this case ?

I agree like in nothing specified by the server case, client wins in any-case, thats true even today not sure if we wanna spec it out thought.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t have a strong opinion for how we consolidate the server- and client‑side configs, other than that we should try to be consistent with how other configuration behaviors are defined.

My main concern is the following part of the spec:

- `client` (default): Clients MUST use client-side scan planning

What exactly is the reason for calling this the “default”?
Does this mean that if the server sends no scan-planning-mode at all, the client must treat it as client? If so, that implies the client should fail whenever it attempts server‑side planning but the server didn’t return a scan-planning-mode.

So we need to align either the spec or the implementation.
If we want to preserve the current code behavior, but avoid specifying semantics for the case where the server sends nothing, then we should remove the word “default,” for example:

- `scan-planning-mode`: Controls scan planning behavior for table operations. Valid values:
  - `client`: Clients MUST use client-side scan planning
  - `server`: Clients MUST use server-side scan planning

Alternatively, if we do want to retain “default,” then we need to update the implementation so that an absent server value is treated exactly the same as if the server had explicitly returned client.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some suggestions to the spec to hopefully clarify this

Copy link
Contributor Author

@singhpk234 singhpk234 Feb 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the feedback Peter & Russell

Alternatively, if we do want to retain “default,” then we need to update the implementation so that an absent server value is treated exactly the same as if the server had explicitly returned client.

This is what i doing in the implementation presently, absence of any value is as if server returned client per here

per your feedbacks seems like we want to get rid of "default", but what if the client doesn't specify anything do we infer "default" is client, i know this is implicit as, this is what happens today, just wanted to be strict in spec POV, but I am totally open to removing "default" (removed it per suggestion) and in case client doesn't specify a client side config implicty assume its client and leave it to the client impl what there default in this case is

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we're aligned on how the configs should be interpreted, we just describe it a bit differently.
The spec is correct, the code is correct; the only difference is in our mental model 😄

To summarize, the configuration precedence for planning mode is:

  1. Server config
  2. Client config
  3. If neither is set → default to client

From my point of view, this default naturally belongs to the client configuration, not the server. So there’s no need to include it explicitly in the specification; unless we start defining client‑side configs in the spec as well, which we shouldn’t.

This is how it is now, so I'm fine with the current solution, giving my +1

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@amogh-jahagirdar how are you feeling about the error condition now. I am kind of thinking that if there is a mis match we should throw an error, but i'm ok with also just doing a warning

"Catalog responded with planning mode : x, this client is configured to use : y . Using y may be unsupported by the catalog. If the planning fails either remove the planning mode configuration from the client, or switch the mode on the client to x"

Wdyt?

// Only validate if BOTH are explicitly set (not null)
if (planningModeClientConfig != null && planningModeServerConfig != null) {
Preconditions.checkState(
planningModeClientConfig.equalsIgnoreCase(planningModeServerConfig),
"Scan planning mode mismatch for table %s: client config=%s, server config=%s",
finalIdentifier,
planningModeClientConfig,
planningModeServerConfig);
}

// Determine effective mode: prefer server config if present, otherwise use client config
String effectiveModeConfig =
planningModeServerConfig != null ? planningModeServerConfig : planningModeClientConfig;
RESTCatalogProperties.ScanPlanningMode effectiveMode =
effectiveModeConfig != null
? RESTCatalogProperties.ScanPlanningMode.fromString(effectiveModeConfig)
: RESTCatalogProperties.ScanPlanningMode.CLIENT;

if (effectiveMode == RESTCatalogProperties.ScanPlanningMode.SERVER) {
Preconditions.checkState(
endpoints.contains(Endpoint.V1_SUBMIT_TABLE_SCAN_PLAN),
"Server requires server-side scan planning for table %s but does not support endpoint %s",
finalIdentifier,
Endpoint.V1_SUBMIT_TABLE_SCAN_PLAN);

return new RESTTable(
ops,
fullTableName(finalIdentifier),
Expand All @@ -610,6 +634,8 @@ private RESTTable restTableForScanPlanning(
properties(),
conf);
}

// Default to client-side planning
return null;
}

Expand Down Expand Up @@ -683,7 +709,7 @@ public Table registerTable(

trackFileIO(ops);

RESTTable restTable = restTableForScanPlanning(ops, ident, tableClient);
RESTTable restTable = restTableForScanPlanning(ops, ident, tableClient, tableConf);
if (restTable != null) {
return restTable;
}
Expand Down Expand Up @@ -952,7 +978,7 @@ public Table create() {

trackFileIO(ops);

RESTTable restTable = restTableForScanPlanning(ops, ident, tableClient);
RESTTable restTable = restTableForScanPlanning(ops, ident, tableClient, tableConf);
if (restTable != null) {
return restTable;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public void before() throws Exception {
httpServer.setHandler(servletContext);
httpServer.start();

restCatalog = initCatalog(catalogName(), ImmutableMap.of());
restCatalog = initCatalog(catalogName(), additionalCatalogProperties());
}

@AfterEach
Expand Down Expand Up @@ -119,6 +119,10 @@ public <T extends RESTResponse> T execute(

protected abstract String catalogName();

protected Map<String, String> additionalCatalogProperties() {
return ImmutableMap.of();
}

@SuppressWarnings("unchecked")
protected <T> T roundTripSerialize(T payload, String description) {
if (payload != null) {
Expand Down
Loading