diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java index 57c84aaf41b1..2e190f60f89a 100644 --- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.TimeUnit; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.common.cloud.ZkNodeProps; @@ -61,7 +60,10 @@ public void testSimpleSliceLeaderElection() throws Exception { String collection = "collection1"; createCollection(collection); - cluster.waitForActiveCollection(collection, 10, TimeUnit.SECONDS, 2, 6); + waitForState( + "Timeout waiting for collection to become active", + collection, + clusterShape(2, NUM_REPLICAS_OF_SHARD1 + 1)); List stoppedRunners = new ArrayList<>(); for (int i = 0; i < 4; i++) { // who is the leader? @@ -107,15 +109,20 @@ public void testSimpleSliceLeaderElection() throws Exception { assertNotNull(jetty); cluster.expireZkSession(jetty); - for (int i = 0; i < 60; i++) { // wait till leader is changed - if (jetty != getRunner(getLeader(collection))) { - break; - } - Thread.sleep(100); - } - - // make sure we have waited long enough for the first leader to have come back - Thread.sleep(ZkTestServer.TICK_TIME * 2 + 100); + // Wait until leadership has moved away from the expired-session node + waitForState( + "Expected leader to move away after expiring zk session", + collection, + c -> { + var l = c.getLeader("shard1"); + return l != null && !jetty.getNodeName().equals(l.getNodeName()); + }); + + // Wait until the expired-session node is live again before stopping others + waitForState( + "Expected expired-session node to rejoin live nodes", + collection, + (liveNodes, c) -> liveNodes.contains(jetty.getNodeName())); // kill everyone but the first leader that should have reconnected by now for (JettySolrRunner jetty2 : cluster.getJettySolrRunners()) { @@ -124,18 +131,13 @@ public void testSimpleSliceLeaderElection() throws Exception { } } - for (int i = 0; i < 320; i++) { // wait till leader is changed - try { - if (jetty == getRunner(getLeader(collection))) { - break; - } - Thread.sleep(100); - } catch (Exception e) { - continue; - } - } - - assertEquals(jetty, getRunner(getLeader(collection))); + waitForState( + "Expected original node to become leader after others stopped", + collection, + c -> { + var l = c.getLeader("shard1"); + return l != null && jetty.getNodeName().equals(l.getNodeName()); + }); } private JettySolrRunner getRunner(String nodeName) { diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkFailoverTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkFailoverTest.java index d97fdfe84bee..36febd0495c8 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ZkFailoverTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ZkFailoverTest.java @@ -23,9 +23,7 @@ import org.apache.solr.client.solrj.request.SolrQuery; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.embedded.JettySolrRunner; -import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.slf4j.Logger; @@ -60,12 +58,15 @@ public void testRestartZkWhenClusterDown() throws Exception { // This attempt will fail since it will time out after 1 second System.setProperty("solr.cloud.wait.for.zk.seconds", "1"); restartSolrAndZk(); - waitForLiveNodes(0); + waitForState("Timeout waiting for 0 live nodes", coll, (liveNodes, c) -> liveNodes.isEmpty()); // This attempt will succeed since there will be enough time to connect System.setProperty("solr.cloud.wait.for.zk.seconds", "20"); restartSolrAndZk(); - waitForLiveNodes(cluster.getJettySolrRunners().size()); + waitForState( + "Timeout waiting for all nodes to come up", + coll, + (liveNodes, c) -> liveNodes.size() == cluster.getJettySolrRunners().size()); waitForState("Timeout waiting for " + coll, coll, clusterShape(2, 2)); QueryResponse rsp = new QueryRequest(new SolrQuery("*:*")).process(cluster.getSolrClient(), coll); @@ -99,14 +100,4 @@ private void restartSolrAndZk() throws Exception { thread.join(); } } - - private void waitForLiveNodes(int numNodes) throws InterruptedException, KeeperException { - ZkStateReader zkStateReader = cluster.getZkStateReader(); - for (int i = 0; i < 100; i++) { - zkStateReader.updateLiveNodes(); - if (zkStateReader.getClusterState().getLiveNodes().size() == numNodes) return; - Thread.sleep(200); - } - fail("Timeout waiting for number of live nodes = " + numNodes); - } } diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java index f52b0fce8db1..ac1e9177fd02 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java @@ -52,14 +52,20 @@ public static void setupCluster() throws Exception { CollectionAdminRequest.createCollection(COLLECTION, "conf", NUM_SHARDS, NUM_REPLICAS) .process(cluster.getSolrClient()) .getStatus()); - cluster.waitForActiveCollection(COLLECTION, 10, TimeUnit.SECONDS, 2, NUM_SHARDS * NUM_REPLICAS); + waitForState( + "Timeout waiting for collection to be active after creation", + COLLECTION, + clusterShape(NUM_SHARDS, NUM_SHARDS * NUM_REPLICAS)); } @Before public void waitForActiveState() throws Exception { CollectionAdminRequest.modifyCollection(COLLECTION, Map.of("readOnly", false)) .process(cluster.getSolrClient()); - cluster.waitForActiveCollection(COLLECTION, 10, TimeUnit.SECONDS, 2, NUM_SHARDS * NUM_REPLICAS); + waitForState( + "Timeout waiting for active collection", + COLLECTION, + clusterShape(NUM_SHARDS, NUM_SHARDS * NUM_REPLICAS)); } @Test