Skip to content

Commit a9a8932

Browse files
[JENKINS-75827] - Deadlock on KubernetesProvisioningLimits during initialization (#1730)
1 parent 39d8132 commit a9a8932

File tree

1 file changed

+79
-71
lines changed

1 file changed

+79
-71
lines changed

src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesProvisioningLimits.java

Lines changed: 79 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
import hudson.ExtensionList;
66
import hudson.model.Node;
77
import hudson.model.Queue;
8-
import java.util.HashMap;
9-
import java.util.Map;
8+
import java.util.concurrent.ConcurrentHashMap;
9+
import java.util.concurrent.ConcurrentMap;
10+
import java.util.concurrent.atomic.AtomicBoolean;
1011
import java.util.logging.Level;
1112
import java.util.logging.Logger;
1213
import jenkins.metrics.api.Metrics;
1314
import jenkins.model.Jenkins;
1415
import jenkins.model.NodeListener;
15-
import net.jcip.annotations.GuardedBy;
1616
import org.kohsuke.accmod.Restricted;
1717
import org.kohsuke.accmod.restrictions.NoExternalUse;
1818

@@ -23,41 +23,44 @@
2323
public final class KubernetesProvisioningLimits {
2424
private static final Logger LOGGER = Logger.getLogger(KubernetesProvisioningLimits.class.getName());
2525

26-
@GuardedBy("this")
27-
private boolean init;
26+
private final AtomicBoolean init = new AtomicBoolean();
2827

2928
/**
3029
* Tracks current number of kubernetes agents per pod template
3130
*/
32-
private final Map<String, Integer> podTemplateCounts = new HashMap<>();
31+
private final ConcurrentMap<String, Integer> podTemplateCounts = new ConcurrentHashMap<>();
3332

3433
/**
3534
* Tracks current number of kubernetes agents per kubernetes cloud
3635
*/
37-
private final Map<String, Integer> cloudCounts = new HashMap<>();
36+
private final ConcurrentMap<String, Integer> cloudCounts = new ConcurrentHashMap<>();
3837

3938
/**
4039
* Initialize limits counter
4140
* @return whether the instance was already initialized before this call.
4241
*/
43-
private synchronized boolean initInstance() {
44-
boolean previousInit = init;
45-
if (!init) {
42+
private boolean initInstance() {
43+
if (init.compareAndSet(false, true)) {
4644
Queue.withLock(() -> {
47-
Jenkins.get().getNodes().stream()
48-
.filter(KubernetesSlave.class::isInstance)
49-
.map(KubernetesSlave.class::cast)
50-
.forEach(node -> {
51-
cloudCounts.put(
52-
node.getCloudName(), getGlobalCount(node.getCloudName()) + node.getNumExecutors());
53-
podTemplateCounts.put(
54-
node.getTemplateId(),
55-
getPodTemplateCount(node.getTemplateId()) + node.getNumExecutors());
56-
});
45+
synchronized (this) {
46+
Jenkins.get().getNodes().stream()
47+
.filter(KubernetesSlave.class::isInstance)
48+
.map(KubernetesSlave.class::cast)
49+
.forEach(node -> {
50+
cloudCounts.put(
51+
node.getCloudName(),
52+
getGlobalCount(node.getCloudName()) + node.getNumExecutors());
53+
podTemplateCounts.put(
54+
node.getTemplateId(),
55+
getPodTemplateCount(node.getTemplateId()) + node.getNumExecutors());
56+
});
57+
}
5758
});
58-
init = true;
59+
60+
return false;
61+
} else {
62+
return true;
5963
}
60-
return previousInit;
6164
}
6265

6366
/**
@@ -73,40 +76,43 @@ public static KubernetesProvisioningLimits get() {
7376
* @param podTemplate the pod template used to schedule the agent
7477
* @param numExecutors the number of executors (pretty much always 1)
7578
*/
76-
public synchronized boolean register(
77-
@NonNull KubernetesCloud cloud, @NonNull PodTemplate podTemplate, int numExecutors) {
79+
public boolean register(@NonNull KubernetesCloud cloud, @NonNull PodTemplate podTemplate, int numExecutors) {
7880
initInstance();
79-
int newGlobalCount = getGlobalCount(cloud.name) + numExecutors;
80-
if (newGlobalCount <= cloud.getContainerCap()) {
81-
int newPodTemplateCount = getPodTemplateCount(podTemplate.getId()) + numExecutors;
82-
if (newPodTemplateCount <= podTemplate.getInstanceCap()) {
83-
cloudCounts.put(cloud.name, newGlobalCount);
84-
LOGGER.log(
85-
Level.FINEST,
86-
() -> cloud.name + " global limit: " + newGlobalCount + "/" + cloud.getContainerCap());
81+
synchronized (this) {
82+
int newGlobalCount = getGlobalCount(cloud.name) + numExecutors;
83+
if (newGlobalCount <= cloud.getContainerCap()) {
84+
int newPodTemplateCount = getPodTemplateCount(podTemplate.getId()) + numExecutors;
85+
if (newPodTemplateCount <= podTemplate.getInstanceCap()) {
86+
cloudCounts.put(cloud.name, newGlobalCount);
87+
LOGGER.log(
88+
Level.FINEST,
89+
() -> cloud.name + " global limit: " + newGlobalCount + "/" + cloud.getContainerCap());
8790

88-
podTemplateCounts.put(podTemplate.getId(), newPodTemplateCount);
89-
LOGGER.log(
90-
Level.FINEST,
91-
() -> podTemplate.getName() + " template limit: " + newPodTemplateCount + "/"
92-
+ podTemplate.getInstanceCap());
93-
return true;
91+
podTemplateCounts.put(podTemplate.getId(), newPodTemplateCount);
92+
LOGGER.log(
93+
Level.FINEST,
94+
() -> podTemplate.getName() + " template limit: " + newPodTemplateCount + "/"
95+
+ podTemplate.getInstanceCap());
96+
return true;
97+
} else {
98+
LOGGER.log(
99+
Level.FINEST,
100+
() -> podTemplate.getName() + " template limit reached: "
101+
+ getPodTemplateCount(podTemplate.getId()) + "/" + podTemplate.getInstanceCap()
102+
+ ". Cannot add " + numExecutors + " more!");
103+
Metrics.metricRegistry()
104+
.counter(MetricNames.REACHED_POD_CAP)
105+
.inc();
106+
}
94107
} else {
95108
LOGGER.log(
96109
Level.FINEST,
97-
() -> podTemplate.getName() + " template limit reached: "
98-
+ getPodTemplateCount(podTemplate.getId()) + "/" + podTemplate.getInstanceCap()
99-
+ ". Cannot add " + numExecutors + " more!");
100-
Metrics.metricRegistry().counter(MetricNames.REACHED_POD_CAP).inc();
110+
() -> cloud.name + " global limit reached: " + getGlobalCount(cloud.name) + "/"
111+
+ cloud.getContainerCap() + ". Cannot add " + numExecutors + " more!");
112+
Metrics.metricRegistry().counter(MetricNames.REACHED_GLOBAL_CAP).inc();
101113
}
102-
} else {
103-
LOGGER.log(
104-
Level.FINEST,
105-
() -> cloud.name + " global limit reached: " + getGlobalCount(cloud.name) + "/"
106-
+ cloud.getContainerCap() + ". Cannot add " + numExecutors + " more!");
107-
Metrics.metricRegistry().counter(MetricNames.REACHED_GLOBAL_CAP).inc();
114+
return false;
108115
}
109-
return false;
110116
}
111117

112118
/**
@@ -115,33 +121,35 @@ public synchronized boolean register(
115121
* @param podTemplate the pod template used to schedule the agent
116122
* @param numExecutors the number of executors (pretty much always 1)
117123
*/
118-
public synchronized void unregister(
119-
@NonNull KubernetesCloud cloud, @NonNull PodTemplate podTemplate, int numExecutors) {
124+
public void unregister(@NonNull KubernetesCloud cloud, @NonNull PodTemplate podTemplate, int numExecutors) {
120125
if (initInstance()) {
121-
int newGlobalCount = getGlobalCount(cloud.name) - numExecutors;
122-
if (newGlobalCount < 0) {
126+
synchronized (this) {
127+
int newGlobalCount = getGlobalCount(cloud.name) - numExecutors;
128+
if (newGlobalCount < 0) {
129+
LOGGER.log(
130+
Level.WARNING,
131+
"Global count for " + cloud.name
132+
+ " went below zero. There is likely a bug in kubernetes-plugin");
133+
}
134+
cloudCounts.put(cloud.name, Math.max(0, newGlobalCount));
123135
LOGGER.log(
124-
Level.WARNING,
125-
"Global count for " + cloud.name
126-
+ " went below zero. There is likely a bug in kubernetes-plugin");
127-
}
128-
cloudCounts.put(cloud.name, Math.max(0, newGlobalCount));
129-
LOGGER.log(
130-
Level.FINEST,
131-
() -> cloud.name + " global limit: " + Math.max(0, newGlobalCount) + "/" + cloud.getContainerCap());
136+
Level.FINEST,
137+
() -> cloud.name + " global limit: " + Math.max(0, newGlobalCount) + "/"
138+
+ cloud.getContainerCap());
132139

133-
int newPodTemplateCount = getPodTemplateCount(podTemplate.getId()) - numExecutors;
134-
if (newPodTemplateCount < 0) {
140+
int newPodTemplateCount = getPodTemplateCount(podTemplate.getId()) - numExecutors;
141+
if (newPodTemplateCount < 0) {
142+
LOGGER.log(
143+
Level.WARNING,
144+
"Pod template count for " + podTemplate.getName()
145+
+ " went below zero. There is likely a bug in kubernetes-plugin");
146+
}
147+
podTemplateCounts.put(podTemplate.getId(), Math.max(0, newPodTemplateCount));
135148
LOGGER.log(
136-
Level.WARNING,
137-
"Pod template count for " + podTemplate.getName()
138-
+ " went below zero. There is likely a bug in kubernetes-plugin");
149+
Level.FINEST,
150+
() -> podTemplate.getName() + " template limit: " + Math.max(0, newPodTemplateCount) + "/"
151+
+ podTemplate.getInstanceCap());
139152
}
140-
podTemplateCounts.put(podTemplate.getId(), Math.max(0, newPodTemplateCount));
141-
LOGGER.log(
142-
Level.FINEST,
143-
() -> podTemplate.getName() + " template limit: " + Math.max(0, newPodTemplateCount) + "/"
144-
+ podTemplate.getInstanceCap());
145153
}
146154
}
147155

0 commit comments

Comments
 (0)