diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java index faf71a7b545d9..180970f1cffc9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java @@ -255,9 +255,9 @@ public void format(NamespaceInfo nsInfo, boolean force) throws IOException { call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "format"); } catch (InterruptedException e) { - throw new IOException("Interrupted waiting for format() response"); + throw new IOException("Interrupted waiting for format() response", e); } catch (TimeoutException e) { - throw new IOException("Timed out waiting for format() response"); + throw new IOException("Timed out waiting for format() response", e); } if (call.countExceptions() > 0) { @@ -273,9 +273,9 @@ public boolean hasSomeData() throws IOException { try { call.waitFor(loggers.size(), 0, 0, timeoutMs, "hasSomeData"); } catch (InterruptedException e) { - throw new IOException("Interrupted while determining if JNs have data"); + throw new IOException("Interrupted while determining if JNs have data", e); } catch (TimeoutException e) { - throw new IOException("Timed out waiting for response from loggers"); + throw new IOException("Timed out waiting for response from loggers", e); } if (call.countExceptions() > 0) { @@ -676,9 +676,9 @@ public void doPreUpgrade() throws IOException { call.rethrowException("Could not do pre-upgrade of one or more JournalNodes"); } } catch (InterruptedException e) { - throw new IOException("Interrupted waiting for doPreUpgrade() response"); + throw new IOException("Interrupted waiting for doPreUpgrade() response", e); } catch (TimeoutException e) { - throw new IOException("Timed out waiting for doPreUpgrade() response"); + throw new IOException("Timed out waiting for doPreUpgrade() response", e); } } @@ -693,9 +693,9 @@ public void doUpgrade(Storage storage) throws IOException { call.rethrowException("Could not perform upgrade of one or more JournalNodes"); } } catch (InterruptedException e) { - throw new IOException("Interrupted waiting for doUpgrade() response"); + throw new IOException("Interrupted waiting for doUpgrade() response", e); } catch (TimeoutException e) { - throw new IOException("Timed out waiting for doUpgrade() response"); + throw new IOException("Timed out waiting for doUpgrade() response", e); } } @@ -710,12 +710,12 @@ public void doFinalize() throws IOException { call.rethrowException("Could not finalize one or more JournalNodes"); } } catch (InterruptedException e) { - throw new IOException("Interrupted waiting for doFinalize() response"); + throw new IOException("Interrupted waiting for doFinalize() response", e); } catch (TimeoutException e) { - throw new IOException("Timed out waiting for doFinalize() response"); + throw new IOException("Timed out waiting for doFinalize() response", e); } } - + @Override public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage, int targetLayoutVersion) throws IOException { @@ -742,10 +742,10 @@ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage, } } catch (InterruptedException e) { throw new IOException("Interrupted waiting for lockSharedStorage() " + - "response"); + "response", e); } catch (TimeoutException e) { throw new IOException("Timed out waiting for lockSharedStorage() " + - "response"); + "response", e); } throw new AssertionError("Unreachable code."); @@ -762,12 +762,12 @@ public void doRollback() throws IOException { call.rethrowException("Could not perform rollback of one or more JournalNodes"); } } catch (InterruptedException e) { - throw new IOException("Interrupted waiting for doFinalize() response"); + throw new IOException("Interrupted waiting for doRollback() response", e); } catch (TimeoutException e) { - throw new IOException("Timed out waiting for doFinalize() response"); + throw new IOException("Timed out waiting for doRollback() response", e); } } - + @Override public void discardSegments(long startTxId) throws IOException { QuorumCall call = loggers.discardSegments(startTxId); @@ -780,10 +780,10 @@ public void discardSegments(long startTxId) throws IOException { } } catch (InterruptedException e) { throw new IOException( - "Interrupted waiting for discardSegments() response"); + "Interrupted waiting for discardSegments() response", e); } catch (TimeoutException e) { throw new IOException( - "Timed out waiting for discardSegments() response"); + "Timed out waiting for discardSegments() response", e); } } @@ -811,10 +811,10 @@ public long getJournalCTime() throws IOException { } } catch (InterruptedException e) { throw new IOException("Interrupted waiting for getJournalCTime() " + - "response"); + "response", e); } catch (TimeoutException e) { throw new IOException("Timed out waiting for getJournalCTime() " + - "response"); + "response", e); } throw new AssertionError("Unreachable code."); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java index 613854b325b7f..7d79b2ebd2ca8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java @@ -19,6 +19,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.ArgumentMatchers.any; @@ -374,6 +376,53 @@ private GetJournaledEditsResponseProto getJournaledEditsReponse( .build(); } + /** + * HDFS-17869: IOException thrown from catch(InterruptedException) and + * catch(TimeoutException) blocks in QuorumJournalManager must chain the + * original exception as the cause so that stack traces are not lost. + * + * Verifies the TimeoutException path by using a very short operations + * timeout combined with loggers that never resolve their format() future. + */ + @Test + public void testFormatTimeoutExceptionIsCauseChained() throws Exception { + Configuration shortTimeoutConf = new Configuration(conf); + // Set an extremely short operations timeout so waitFor() times out quickly. + shortTimeoutConf.set(DFSConfigKeys.DFS_QJM_OPERATIONS_TIMEOUT, "50ms"); + + List neverResolvingLoggers = ImmutableList.of( + mockLogger(), mockLogger(), mockLogger()); + + QuorumJournalManager shortQjm = new QuorumJournalManager( + shortTimeoutConf, new URI("qjournal://host/jid"), FAKE_NSINFO) { + @Override + protected List createLoggers(AsyncLogger.Factory factory) { + return neverResolvingLoggers; + } + }; + + // Make every logger return a future that never completes, so waitFor() is + // forced to time out. + for (AsyncLogger logger : neverResolvingLoggers) { + SettableFuture neverDone = SettableFuture.create(); + Mockito.doReturn(neverDone).when(logger) + .format(Mockito.any(), anyBoolean()); + } + + try { + shortQjm.format(FAKE_NSINFO, true); + fail("Expected IOException from timeout"); + } catch (IOException ioe) { + assertNotNull(ioe.getCause(), + "IOException must chain the root cause (HDFS-17869)"); + assertInstanceOf(java.util.concurrent.TimeoutException.class, + ioe.getCause(), + "Root cause must be the TimeoutException, not swallowed"); + } finally { + shortQjm.close(); + } + } + private EditLogOutputStream createLogSegment() throws IOException { futureReturns(null).when(spyLoggers.get(0)).startLogSegment(Mockito.anyLong(), Mockito.eq(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION));