Skip to content

Commit 49be562

Browse files
authored
[HWORKS-512] Read cgroup parent from database (#1347) (#1329) (#1334)
* Pyhton resources fixes * Remove empty line * Take care of the . * Rename variable
1 parent 6631232 commit 49be562

File tree

2 files changed

+25
-7
lines changed

2 files changed

+25
-7
lines changed

hopsworks-common/src/main/java/io/hops/hopsworks/common/pythonresources/PythonResourcesController.java

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,25 +152,34 @@ private Map<String, String> getNodeExporterQueriesKube() {
152152
}
153153

154154
private Map<String, String> getCadvisorQueries() {
155+
final String dockerCgroupParent = settings.getDockerCgroupParent();
156+
String cGroupParentForRegex = dockerCgroupParent.replaceAll("\\.", "\\\\\\\\.");
157+
final String headNodeInstanceQuery = "instance='" + glassfishIp + ":" + nodeExporterPort + "'";
155158
return new HashMap<String, String>() {
156159
{
157160
put(DOCKER_CURRENT_CPU_USAGE_KEY,
158-
"sum(avg by (cpu) (rate(container_cpu_usage_seconds_total{id=~'.*/docker/.*'}[60s]) * 100))");
161+
"sum(avg by (cpu) (rate(container_cpu_usage_seconds_total{id=~'.*/" + cGroupParentForRegex
162+
+ "/.*'}[60s]) * 100))");
159163
put(DOCKER_TOTAL_ALLOCATABLE_CPU_KEY,
160-
"(container_spec_cpu_quota{id='/docker'}/" + settings.getDockerCgroupCpuPeriod() + ")*100");
161-
put(DOCKER_CURRENT_MEMORY_USAGE_KEY, "sum(container_memory_working_set_bytes{id=~'.*/docker/.*'})");
162-
put(DOCKER_TOTAL_ALLOCATABLE_MEMORY_KEY, "container_spec_memory_limit_bytes{id='/docker'}");
164+
"(sum(container_spec_cpu_quota{id='/" + dockerCgroupParent + "'})/("
165+
+ settings.getDockerCgroupCpuPeriod()
166+
+ " * (count(count(node_cpu_seconds_total{" + headNodeInstanceQuery + "}) without (mode,instance,job))"
167+
+ "without (cpu))))*100");
168+
put(DOCKER_CURRENT_MEMORY_USAGE_KEY, "sum(container_memory_working_set_bytes{id=~'.*/" + cGroupParentForRegex
169+
+ "/.*'})");
170+
put(DOCKER_TOTAL_ALLOCATABLE_MEMORY_KEY, "container_spec_memory_limit_bytes{id='/" + dockerCgroupParent
171+
+ "'}");
163172
}
164173
};
165174
}
166175

167176
private Map<String, String> getNodeExporterQueriesHeadNode() {
168-
String headNodeQuery = "instance='" + glassfishIp + ":" + nodeExporterPort + "'";
177+
final String headNodeQuery = "instance='" + glassfishIp + ":" + nodeExporterPort + "'";
169178
return new HashMap<String, String>() {
170179
{
171180
put(CLUSTER_CURRENT_CPU_USAGE,
172-
"100 - ((sum((avg by (instance) (rate(node_cpu_seconds_total{mode='idle', " + headNodeQuery + "}[1m])) " +
173-
"* 100)))/(count(node_memory_Active_bytes{" + headNodeQuery + "})))");
181+
"100 - ((sum((avg by (instance) (rate(node_cpu_seconds_total{mode='idle', " + headNodeQuery + "}[1m])) "
182+
+ "* 100)))/(count(node_memory_Active_bytes{" + headNodeQuery + "})))");
174183
put(CLUSTER_CURRENT_MEMORY_USAGE, "sum(node_memory_Active_bytes{" + headNodeQuery + "})");
175184
put(CLUSTER_TOTAL_MEMORY_CAPACITY, "sum(node_memory_MemTotal_bytes{" + headNodeQuery + "})");
176185
}

hopsworks-common/src/main/java/io/hops/hopsworks/common/util/Settings.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@ public class Settings implements Serializable {
405405
private static final String VARIABLE_DOCKER_CGROUP_CPU_QUOTA = "docker_cgroup_cpu_quota_percentage";
406406
private static final String VARIABLE_DOCKER_CGROUP_CPU_PERIOD = "docker_cgroup_cpu_period";
407407
private static final String VARIABLE_DOCKER_CGROUP_MONITOR_INTERVAL = "docker_cgroup_monitor_interval";
408+
private static final String VARIABLE_DOCKER_CGROUP_PARENT = "docker_cgroup_parent";
408409

409410
private static final String VARIABLE_PROMETHEUS_PORT = "prometheus_port";
410411

@@ -902,6 +903,7 @@ private void populateCache() {
902903
ENABLE_GIT_READ_ONLY_REPOSITORIES = setBoolVar(VARIABLE_ENABLE_GIT_READ_ONLY_REPOSITORIES,
903904
ENABLE_GIT_READ_ONLY_REPOSITORIES);
904905

906+
//Docker cgroups
905907
DOCKER_CGROUP_ENABLED = setBoolVar(VARIABLE_DOCKER_CGROUP_ENABLED, DOCKER_CGROUP_ENABLED);
906908
DOCKER_CGROUP_MEMORY_LIMIT = setStrVar(VARIABLE_DOCKER_CGROUP_HARD_LIMIT_MEMORY,
907909
DOCKER_CGROUP_MEMORY_LIMIT);
@@ -911,6 +913,7 @@ private void populateCache() {
911913
DOCKER_CGROUP_CPU_PERIOD = setIntVar(VARIABLE_DOCKER_CGROUP_CPU_PERIOD, DOCKER_CGROUP_CPU_PERIOD);
912914
DOCKER_CGROUP_MONITOR_INTERVAL = setStrVar(VARIABLE_DOCKER_CGROUP_MONITOR_INTERVAL,
913915
DOCKER_CGROUP_MONITOR_INTERVAL);
916+
DOCKER_CGROUP_PARENT = setStrVar(VARIABLE_DOCKER_CGROUP_PARENT, DOCKER_CGROUP_PARENT);
914917

915918
PROMETHEUS_PORT = setIntVar(VARIABLE_PROMETHEUS_PORT, PROMETHEUS_PORT);
916919

@@ -1785,6 +1788,12 @@ public synchronized String getDockerCgroupIntervalMonitor() {
17851788
return DOCKER_CGROUP_MONITOR_INTERVAL;
17861789
}
17871790

1791+
private String DOCKER_CGROUP_PARENT = "docker.slice";
1792+
public synchronized String getDockerCgroupParent() {
1793+
checkCache();
1794+
return DOCKER_CGROUP_PARENT;
1795+
}
1796+
17881797

17891798
// Service key rotation interval
17901799
private static final String JUPYTER_SHUTDOWN_TIMER_INTERVAL = "jupyter_shutdown_timer_interval";

0 commit comments

Comments
 (0)