Skip to content

Commit 86dfa61

Browse files
robzor92javierdlrm
andauthored
[HWORKS-483] Add creating status for deployments (#1344) (#1332) (#1333)
Co-authored-by: Javier de la Rúa Martínez <[email protected]>
1 parent 49be562 commit 86dfa61

File tree

6 files changed

+75
-32
lines changed

6 files changed

+75
-32
lines changed

hopsworks-IT/src/test/ruby/spec/helpers/serving_helper.rb

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ def delete_all_servings(project_id)
124124
end
125125

126126
def start_serving(project, serving)
127+
#Stopped status can happen if the deployment is reused, which is the case in some of the tests
128+
wait_for_serving_status(project, serving[:name], ["Created", "Stopped"])
127129
post "#{ENV['HOPSWORKS_API']}/project/#{project[:id]}/serving/#{serving[:id]}?action=start"
128130
expect_status_details(200)
129131
end
@@ -201,18 +203,18 @@ def parse_inference_logging(value)
201203
end
202204
end
203205

204-
def get_serving(serving_name)
205-
serving_list = get "#{ENV['HOPSWORKS_API']}/project/#{@project[:id]}/serving/"
206+
def get_serving(project, serving_name)
207+
serving_list = get "#{ENV['HOPSWORKS_API']}/project/#{project[:id]}/serving/"
206208
expect_status_details(200)
207209
servings = JSON.parse(serving_list)
208210
servings.select { |serving| serving['name'] == serving_name}[0]
209211
end
210212

211213
def wait_for_serving_status(serving_name, status, timeout: 180, delay: 10)
212214
wait_result = wait_for_me_time(timeout, delay) do
213-
result = get_serving(serving_name)
214-
{ 'success' => result['status'].eql?(status), 'status' => result['status'] }
215+
result = get_serving(project, serving_name)
216+
{ 'success' => accepted_status.include?(result['status']), 'status' => result['status'] }
215217
end
216-
expect(wait_result["status"]).to eql(status)
218+
expect(accepted_status.include?(wait_result["status"]))
217219
end
218220
end

hopsworks-IT/src/test/ruby/spec/serving_spec.rb

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,16 @@
926926
delete_all_servings(@project[:id])
927927
end
928928

929+
it "should fail to start a deployment that is being creating" do
930+
result = get_serving(@project, @serving[:name])
931+
if result['status'] == "Creating"
932+
post "#{ENV['HOPSWORKS_API']}/project/#{@project[:id]}/serving/#{@serving[:id]}?action=start"
933+
expect_status_details(400, error_code: 240003)
934+
else
935+
skip "Serving already created"
936+
end
937+
end
938+
929939
it "should be able to start a serving instance" do
930940
start_serving(@project, @serving)
931941
wait_for_type(@serving[:name])
@@ -1006,22 +1016,32 @@
10061016
with_tensorflow_serving(@project[:id], @project[:projectname], @user[:username])
10071017

10081018
start_serving(@project, @serving)
1009-
wait_for_serving_status(@serving[:name], "Running")
1019+
wait_for_serving_status(@project, @serving[:name], ["Running"])
10101020
end
10111021

10121022
after :all do
10131023
purge_all_tf_serving_instances
10141024
delete_all_servings(@project[:id])
10151025
end
10161026

1017-
it "should be able to kill a running serving instance" do
1027+
it "should fail to stop a deployment that is being creating" do
1028+
result = get_serving(@project, @serving[:name])
1029+
if result['status'] == "Creating"
1030+
post "#{ENV['HOPSWORKS_API']}/project/#{@project[:id]}/serving/#{@serving[:id]}?action=stop"
1031+
expect_status_details(400, error_code: 240003)
1032+
else
1033+
skip "Serving already created"
1034+
end
1035+
end
1036+
1037+
it "should be able to stop a running serving instance" do
10181038
post "#{ENV['HOPSWORKS_API']}/project/#{@project[:id]}/serving/#{@serving[:id]}?action=stop"
10191039
expect_status_details(200)
10201040

1021-
wait_for_serving_status(@serving[:name], "Stopped")
1041+
wait_for_serving_status(@project, @serving[:name], ["Stopped"])
10221042
end
10231043

1024-
it "should fail to kill a non running instance" do
1044+
it "should fail to stop a non running instance" do
10251045
# serving is already stopped
10261046

10271047
post "#{ENV['HOPSWORKS_API']}/project/#{@project[:id]}/serving/#{@serving[:id]}?action=stop"
@@ -1034,7 +1054,7 @@
10341054
end
10351055

10361056
start_serving(@project, @serving)
1037-
wait_for_serving_status(@serving[:name], "Running")
1057+
wait_for_serving_status(@project, @serving[:name], ["Running"])
10381058

10391059
# Simulate the process dying by its own
10401060
system "pgrep -f tensorflow_model_server | xargs kill -9"
@@ -1093,6 +1113,15 @@
10931113
delete_all_servings(@project[:id])
10941114
end
10951115

1116+
it "should delete a deployment that is being creating" do
1117+
result = get_serving(@project, @serving[:name])
1118+
delete "#{ENV['HOPSWORKS_API']}/project/#{@project[:id]}/serving/#{@serving[:id]}"
1119+
expect_status_details(200)
1120+
if result['status'] != "Creating"
1121+
skip "Serving already created"
1122+
end
1123+
end
1124+
10961125
it "should delete a serving instance" do
10971126
delete "#{ENV['HOPSWORKS_API']}/project/#{@project[:id]}/serving/#{@serving[:id]}"
10981127
expect_status_details(200)
@@ -1149,20 +1178,23 @@
11491178
before :all do
11501179
with_valid_project
11511180
copy_mnist_files(@project[:projectname], @user[:username])
1152-
create_tensorflow_serving(@project[:id], @project[:projectname])
1153-
create_tensorflow_serving(@project[:id], @project[:projectname])
1154-
@tf_serving = create_tensorflow_serving(@project[:id], @project[:projectname])
1181+
@tf_serving1 = create_tensorflow_serving(@project[:id], @project[:projectname])
1182+
@tf_serving2 = create_tensorflow_serving(@project[:id], @project[:projectname])
1183+
@tf_serving3 = create_tensorflow_serving(@project[:id], @project[:projectname])
11551184
end
11561185

11571186
it "should return all servings" do
11581187
get_servings(@project, nil)
11591188
expect_status_details(200)
1160-
json_body.each {|model| expect(model[:status]).to eq "Created"}
1189+
json_body.each {|model| expect(["Creating", "Created"].find { |s| s == model[:status] }).to_not be_nil }
11611190
expect(json_body.length).to eq 3
11621191
end
11631192

11641193
describe "#status" do
11651194
it "should return all servings in created state" do
1195+
wait_for_serving_status(@project, @tf_serving1[:name], ["Created"])
1196+
wait_for_serving_status(@project, @tf_serving2[:name], ["Created"])
1197+
wait_for_serving_status(@project, @tf_serving3[:name], ["Created"])
11661198
get_servings(@project, "?status=Created")
11671199
expect_status_details(200)
11681200
json_body.each {|model| expect(model[:status]).to eq "Created"}
@@ -1176,8 +1208,8 @@
11761208
end
11771209

11781210
it "should return single running serving" do
1179-
start_serving(@project, @tf_serving)
1180-
wait_for_serving_status(@tf_serving[:name], "Running")
1211+
start_serving(@project, @tf_serving3)
1212+
wait_for_serving_status(@project, @tf_serving3[:name], ["Running"])
11811213

11821214
get_servings(@project, "?status=Running")
11831215
expect_status_details(200)

hopsworks-common/src/main/java/io/hops/hopsworks/common/security/QuotasEnforcement.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,10 @@ private List<ServingWrapper> getAllServings(Project project) throws ServingExcep
180180
}
181181

182182
private boolean isDeploymentRunning(ServingWrapper serving) {
183-
return serving.getStatus().equals(ServingStatusEnum.STARTING)
184-
|| serving.getStatus().equals(ServingStatusEnum.FAILED)
185-
|| serving.getStatus().equals(ServingStatusEnum.RUNNING)
186-
|| serving.getStatus().equals(ServingStatusEnum.IDLE)
187-
|| serving.getStatus().equals(ServingStatusEnum.UPDATING);
183+
return !serving.getStatus().equals(ServingStatusEnum.CREATING)
184+
&& !serving.getStatus().equals(ServingStatusEnum.CREATED)
185+
&& !serving.getStatus().equals(ServingStatusEnum.STOPPING)
186+
&& !serving.getStatus().equals(ServingStatusEnum.STOPPED);
188187
}
189188

190189
private void enforceFeaturegroupsQuotaInternal(Featurestore featurestore, List<Featuregroup> featuregroups,

hopsworks-common/src/main/java/io/hops/hopsworks/common/serving/ServingStatusCondition.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public class ServingStatusCondition {
3737
public static final String UnscheduledInProgress = "Waiting for being unscheduled";
3838
public static final String StoppedInProgress = "Stopping deployment";
3939
public static final String StoppedSuccess = "Deployment is not running";
40+
public static final String StoppedCreating = "Deployment is being prepared";
4041

4142
public ServingStatusCondition() {
4243
}
@@ -130,4 +131,8 @@ public static ServingStatusCondition getStoppedInProgressCondition() {
130131
public static ServingStatusCondition getStoppedSuccessCondition() {
131132
return new ServingStatusCondition(ServingStatusConditionEnum.STOPPED, true, StoppedSuccess);
132133
}
134+
135+
public static ServingStatusCondition getStoppedCreatingCondition() {
136+
return new ServingStatusCondition(ServingStatusConditionEnum.STOPPED, StoppedCreating);
137+
}
133138
}

hopsworks-common/src/main/java/io/hops/hopsworks/common/serving/ServingStatusEnum.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import com.fasterxml.jackson.annotation.JsonValue;
4444

4545
public enum ServingStatusEnum {
46+
CREATING("Creating"),
4647
CREATED("Created"),
4748
STARTING("Starting"),
4849
FAILED("Failed"),

hopsworks-common/src/test/io/hops/hopsworks/common/security/TestQuotasEnforcement.java

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -222,36 +222,40 @@ public void testQuotasRunningModelDeployments() throws Exception {
222222
List<ServingWrapper> mockServings = new ArrayList<>();
223223

224224
ServingWrapper sw0 = new ServingWrapper(new Serving());
225-
sw0.setStatus(ServingStatusEnum.CREATED);
225+
sw0.setStatus(ServingStatusEnum.CREATING);
226226
mockServings.add(sw0);
227227

228228
ServingWrapper sw1 = new ServingWrapper(new Serving());
229-
sw1.setStatus(ServingStatusEnum.STARTING);
229+
sw1.setStatus(ServingStatusEnum.CREATED);
230230
mockServings.add(sw1);
231231

232232
ServingWrapper sw2 = new ServingWrapper(new Serving());
233-
sw2.setStatus(ServingStatusEnum.FAILED);
233+
sw2.setStatus(ServingStatusEnum.STARTING);
234234
mockServings.add(sw2);
235235

236236
ServingWrapper sw3 = new ServingWrapper(new Serving());
237-
sw3.setStatus(ServingStatusEnum.RUNNING);
237+
sw3.setStatus(ServingStatusEnum.FAILED);
238238
mockServings.add(sw3);
239-
239+
240240
ServingWrapper sw4 = new ServingWrapper(new Serving());
241-
sw4.setStatus(ServingStatusEnum.IDLE);
241+
sw4.setStatus(ServingStatusEnum.RUNNING);
242242
mockServings.add(sw4);
243-
243+
244244
ServingWrapper sw5 = new ServingWrapper(new Serving());
245-
sw5.setStatus(ServingStatusEnum.UPDATING);
245+
sw5.setStatus(ServingStatusEnum.IDLE);
246246
mockServings.add(sw5);
247247

248248
ServingWrapper sw6 = new ServingWrapper(new Serving());
249-
sw6.setStatus(ServingStatusEnum.STOPPING);
249+
sw6.setStatus(ServingStatusEnum.UPDATING);
250250
mockServings.add(sw6);
251-
251+
252252
ServingWrapper sw7 = new ServingWrapper(new Serving());
253-
sw7.setStatus(ServingStatusEnum.STOPPED);
253+
sw7.setStatus(ServingStatusEnum.STOPPING);
254254
mockServings.add(sw7);
255+
256+
ServingWrapper sw8 = new ServingWrapper(new Serving());
257+
sw8.setStatus(ServingStatusEnum.STOPPED);
258+
mockServings.add(sw8);
255259

256260
ServingController servingController = Mockito.mock(ServingController.class);
257261
Mockito.when(servingController.getAll(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any())).thenReturn(mockServings);

0 commit comments

Comments
 (0)