Skip to content

Commit 6dcbaeb

Browse files
Merge pull request #26 from TRI-ML/step_refactor_verify_paper_results
Step refactor verify paper results
2 parents 4a517ef + 7da25db commit 6dcbaeb

File tree

79 files changed

+1697
-260
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+1697
-260
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Lines changed: 356 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,356 @@
1+
"""Script to print out all (most) paper results from camera-ready version.
2+
"""
3+
4+
import os
5+
from pathlib import Path
6+
7+
import numpy as np
8+
9+
from sequentialized_barnard_tests import (
10+
Hypothesis,
11+
MirroredLaiTest,
12+
MirroredSaviTest,
13+
MirroredStepTest,
14+
)
15+
16+
if __name__ == "__main__":
17+
"""
18+
Script to confirm paper results. Prints all results to terminal.
19+
20+
Runtime should be on the order of ~5 seconds
21+
"""
22+
23+
# Set the data path
24+
paper_data_path = str(
25+
Path(
26+
os.path.join(
27+
os.path.dirname(os.path.abspath(__file__)),
28+
"../../tests/eval_data/",
29+
)
30+
).resolve()
31+
)
32+
33+
# Load the paper data
34+
eval_clean_up_spill = np.load(
35+
f"{paper_data_path}/TRI_CLEAN_SPILL_v4.npy"
36+
) # Must be flipped for standard form
37+
eval_fold_red_towel = np.load(
38+
f"{paper_data_path}/TRI_FOLD_RED_TOWEL.npy"
39+
) # ALREADY in standard form
40+
eval_sim_spoon_on_towel = np.load(
41+
f"{paper_data_path}/TRI_SIM_SPOON_ON_TOWEL.npy"
42+
) # Must be flipped for standard form
43+
eval_sim_eggplant_in_basket = np.load(
44+
f"{paper_data_path}/TRI_SIM_EGGPLANT_IN_BASKET.npy"
45+
) # Must be flipped for standard form
46+
eval_sim_stack_cube = np.load(
47+
f"{paper_data_path}/TRI_SIM_STACK_CUBE.npy"
48+
) # Must be flipped for standard form
49+
eval_distribution_carrot_on_plate_supplement = np.load(
50+
f"{paper_data_path}/PU_HARDWARE_DISTRIBUTION_SUPPLEMENT.npy"
51+
) # Must be flipped for standard form
52+
eval_policy_carrot_on_plate_supplement = np.load(
53+
f"{paper_data_path}/PU_HARDWARE_POLICY_SUPPLEMENT.npy"
54+
) # ALREADY in standard form
55+
56+
# Load SAVI tests
57+
savi_hardware = MirroredSaviTest(alternative=Hypothesis.P0LessThanP1, alpha=0.05)
58+
savi_simulation = MirroredSaviTest(alternative=Hypothesis.P0LessThanP1, alpha=0.01)
59+
60+
# Load Lai tests
61+
lai_hardware_50 = MirroredLaiTest(
62+
alternative=Hypothesis.P0LessThanP1, n_max=50, alpha=0.05
63+
)
64+
lai_hardware_50.calibrate_c()
65+
print("Lai-50 value of c: ", lai_hardware_50.c)
66+
67+
lai_hardware_200 = MirroredLaiTest(
68+
alternative=Hypothesis.P0LessThanP1, n_max=200, alpha=0.05
69+
)
70+
lai_hardware_200.calibrate_c()
71+
print("Lai-200 value of c: ", lai_hardware_200.c)
72+
# lai_hardware_200.set_c(0.00014741399676752065)
73+
74+
lai_hardware_500 = MirroredLaiTest(
75+
alternative=Hypothesis.P0LessThanP1, n_max=500, alpha=0.05
76+
)
77+
lai_hardware_500.calibrate_c()
78+
print("Lai-500 value of c: ", lai_hardware_500.c)
79+
# lai_hardware_500.set_c(5.349419043278717e-05)
80+
81+
lai_simulation_500 = MirroredLaiTest(
82+
alternative=Hypothesis.P0LessThanP1, n_max=500, alpha=0.01
83+
)
84+
lai_simulation_500.calibrate_c()
85+
print("Lai-500 SIM value of c: ", lai_simulation_500.c)
86+
# lai_simulation_500.set_c(1.184327928758278e-05)
87+
88+
# Load STEP tests
89+
step_random_seed = 42
90+
91+
step_hardware_50 = MirroredStepTest(
92+
alternative=Hypothesis.P0LessThanP1,
93+
n_max=50,
94+
alpha=0.05,
95+
random_seed=step_random_seed,
96+
)
97+
step_hardware_200 = MirroredStepTest(
98+
alternative=Hypothesis.P0LessThanP1,
99+
n_max=200,
100+
alpha=0.05,
101+
random_seed=step_random_seed,
102+
)
103+
step_hardware_500 = MirroredStepTest(
104+
alternative=Hypothesis.P0LessThanP1,
105+
n_max=500,
106+
alpha=0.05,
107+
random_seed=step_random_seed,
108+
)
109+
step_simulation_500 = MirroredStepTest(
110+
alternative=Hypothesis.P0LessThanP1,
111+
n_max=500,
112+
alpha=0.01,
113+
random_seed=step_random_seed,
114+
)
115+
116+
# Run without index permutation
117+
permutation_idx_hardware = np.arange(50)
118+
permutation_idx_simulation = np.arange(500)
119+
120+
# Run appropriate tests on each data stream
121+
122+
####################################
123+
### Result 0: Princeton Hardware ###
124+
####################################
125+
carrotplate_distribution_result_lai_200 = lai_hardware_200.run_on_sequence(
126+
eval_distribution_carrot_on_plate_supplement[:, 0],
127+
eval_distribution_carrot_on_plate_supplement[:, 1],
128+
)
129+
carrotplate_distribution_result_step_200 = step_hardware_200.run_on_sequence(
130+
eval_distribution_carrot_on_plate_supplement[:, 0],
131+
eval_distribution_carrot_on_plate_supplement[:, 1],
132+
)
133+
carrotplate_distribution_result_savi = savi_hardware.run_on_sequence(
134+
eval_distribution_carrot_on_plate_supplement[:, 0],
135+
eval_distribution_carrot_on_plate_supplement[:, 1],
136+
)
137+
138+
carrotplate_policy_result_lai_200 = lai_hardware_200.run_on_sequence(
139+
eval_policy_carrot_on_plate_supplement[:, 0],
140+
eval_policy_carrot_on_plate_supplement[:, 1],
141+
)
142+
carrotplate_policy_result_step_200 = step_hardware_200.run_on_sequence(
143+
eval_policy_carrot_on_plate_supplement[:, 0],
144+
eval_policy_carrot_on_plate_supplement[:, 1],
145+
)
146+
carrotplate_policy_result_savi = savi_hardware.run_on_sequence(
147+
eval_policy_carrot_on_plate_supplement[:, 0],
148+
eval_policy_carrot_on_plate_supplement[:, 1],
149+
)
150+
151+
print()
152+
print("CARROT ON PLATE (Distribution Shift): ")
153+
print()
154+
print(
155+
"Lai-200 time-to-decision: ",
156+
carrotplate_distribution_result_lai_200.info["Time"],
157+
)
158+
print(
159+
"STEP-200 time-to-decision: ",
160+
carrotplate_distribution_result_step_200.info["Time"],
161+
)
162+
print(
163+
"SAVI time-to-decision: ",
164+
carrotplate_distribution_result_savi.info["result_for_alternative"].info[
165+
"Time"
166+
],
167+
)
168+
print()
169+
print("CARROT ON PLATE Supplement E (Policy Shift): ")
170+
print()
171+
print(
172+
"Lai-200 time-to-decision: ",
173+
carrotplate_policy_result_lai_200.info["Time"],
174+
)
175+
print(
176+
"STEP-200 time-to-decision: ",
177+
carrotplate_policy_result_step_200.info["Time"],
178+
)
179+
print(
180+
"SAVI time-to-decision: ",
181+
carrotplate_policy_result_savi.info["result_for_alternative"].info["Time"],
182+
)
183+
184+
##############################
185+
### Result 1: FoldRedTowel ###
186+
##############################
187+
foldredtowel_result_lai_50 = lai_hardware_50.run_on_sequence(
188+
eval_fold_red_towel[permutation_idx_hardware, 0],
189+
eval_fold_red_towel[permutation_idx_hardware, 1],
190+
)
191+
foldredtowel_result_lai_200 = lai_hardware_200.run_on_sequence(
192+
eval_fold_red_towel[permutation_idx_hardware, 0],
193+
eval_fold_red_towel[permutation_idx_hardware, 1],
194+
)
195+
foldredtowel_result_lai_500 = lai_hardware_500.run_on_sequence(
196+
eval_fold_red_towel[permutation_idx_hardware, 0],
197+
eval_fold_red_towel[permutation_idx_hardware, 1],
198+
)
199+
200+
foldredtowel_result_savi = savi_hardware.run_on_sequence(
201+
eval_fold_red_towel[permutation_idx_hardware, 0],
202+
eval_fold_red_towel[permutation_idx_hardware, 1],
203+
)
204+
205+
foldredtowel_result_step_50 = step_hardware_50.run_on_sequence(
206+
eval_fold_red_towel[permutation_idx_hardware, 0],
207+
eval_fold_red_towel[permutation_idx_hardware, 1],
208+
)
209+
foldredtowel_result_step_200 = step_hardware_200.run_on_sequence(
210+
eval_fold_red_towel[permutation_idx_hardware, 0],
211+
eval_fold_red_towel[permutation_idx_hardware, 1],
212+
)
213+
foldredtowel_result_step_500 = step_hardware_500.run_on_sequence(
214+
eval_fold_red_towel[permutation_idx_hardware, 0],
215+
eval_fold_red_towel[permutation_idx_hardware, 1],
216+
)
217+
218+
print()
219+
print("FOLD RED TOWEL: ")
220+
print()
221+
print("Lai-50 time-to-decision: ", foldredtowel_result_lai_50.info["Time"])
222+
print("Lai-200 time-to-decision: ", foldredtowel_result_lai_200.info["Time"])
223+
print("Lai-500 time-to-decision: ", foldredtowel_result_lai_500.info["Time"])
224+
print("STEP-50 time-to-decision: ", foldredtowel_result_step_50.info["Time"])
225+
print("STEP-200 time-to-decision: ", foldredtowel_result_step_200.info["Time"])
226+
print("STEP-500 time-to-decision: ", foldredtowel_result_step_500.info["Time"])
227+
print(
228+
"SAVI time-to-decision: ",
229+
foldredtowel_result_savi.info["result_for_alternative"].info["Time"],
230+
)
231+
232+
##############################
233+
### Result 2: CleanUpSpill ###
234+
##############################
235+
cleanupspill_result_lai_50 = lai_hardware_50.run_on_sequence(
236+
eval_clean_up_spill[permutation_idx_hardware, 0],
237+
eval_clean_up_spill[permutation_idx_hardware, 1],
238+
)
239+
cleanupspill_result_lai_200 = lai_hardware_200.run_on_sequence(
240+
eval_clean_up_spill[permutation_idx_hardware, 0],
241+
eval_clean_up_spill[permutation_idx_hardware, 1],
242+
)
243+
cleanupspill_result_lai_500 = lai_hardware_500.run_on_sequence(
244+
eval_clean_up_spill[permutation_idx_hardware, 0],
245+
eval_clean_up_spill[permutation_idx_hardware, 1],
246+
)
247+
248+
cleanupspill_result_savi = savi_hardware.run_on_sequence(
249+
eval_clean_up_spill[permutation_idx_hardware, 0],
250+
eval_clean_up_spill[permutation_idx_hardware, 1],
251+
)
252+
253+
cleanupspill_result_step_50 = step_hardware_50.run_on_sequence(
254+
eval_clean_up_spill[permutation_idx_hardware, 0],
255+
eval_clean_up_spill[permutation_idx_hardware, 1],
256+
)
257+
cleanupspill_result_step_200 = step_hardware_200.run_on_sequence(
258+
eval_clean_up_spill[permutation_idx_hardware, 0],
259+
eval_clean_up_spill[permutation_idx_hardware, 1],
260+
)
261+
cleanupspill_result_step_500 = step_hardware_500.run_on_sequence(
262+
eval_clean_up_spill[permutation_idx_hardware, 0],
263+
eval_clean_up_spill[permutation_idx_hardware, 1],
264+
)
265+
266+
print()
267+
print("CLEAN UP SPILL: ")
268+
print()
269+
print("Lai-50 time-to-decision: ", cleanupspill_result_lai_50.info["Time"])
270+
print("Lai-200 time-to-decision: ", cleanupspill_result_lai_200.info["Time"])
271+
print("Lai-500 time-to-decision: ", cleanupspill_result_lai_500.info["Time"])
272+
print("STEP-50 time-to-decision: ", cleanupspill_result_step_50.info["Time"])
273+
print("STEP-200 time-to-decision: ", cleanupspill_result_step_200.info["Time"])
274+
print("STEP-500 time-to-decision: ", cleanupspill_result_step_500.info["Time"])
275+
print(
276+
"SAVI time-to-decision: ",
277+
cleanupspill_result_savi.info["result_for_alternative"].info["Time"],
278+
)
279+
280+
##############################
281+
### Result 3: SpoonOnTowel ###
282+
##############################
283+
spoonontowel_result_lai_500 = lai_simulation_500.run_on_sequence(
284+
eval_sim_spoon_on_towel[permutation_idx_simulation, 0],
285+
eval_sim_spoon_on_towel[permutation_idx_simulation, 1],
286+
)
287+
spoonontowel_result_savi = savi_simulation.run_on_sequence(
288+
eval_sim_spoon_on_towel[permutation_idx_simulation, 0],
289+
eval_sim_spoon_on_towel[permutation_idx_simulation, 1],
290+
)
291+
spoonontowel_result_step_500 = step_simulation_500.run_on_sequence(
292+
eval_sim_spoon_on_towel[permutation_idx_simulation, 0],
293+
eval_sim_spoon_on_towel[permutation_idx_simulation, 1],
294+
)
295+
296+
print()
297+
print("SPOON ON TOWEL: ")
298+
print()
299+
print("Lai-500 time-to-decision: ", spoonontowel_result_lai_500.info["Time"])
300+
print("STEP-500 time-to-decision: ", spoonontowel_result_step_500.info["Time"])
301+
print(
302+
"SAVI time-to-decision: ",
303+
spoonontowel_result_savi.info["result_for_alternative"].info["Time"],
304+
)
305+
306+
##################################
307+
### Result 4: EggplantInBasket ###
308+
##################################
309+
eggplantinbasket_result_lai_500 = lai_simulation_500.run_on_sequence(
310+
eval_sim_eggplant_in_basket[permutation_idx_simulation, 0],
311+
eval_sim_eggplant_in_basket[permutation_idx_simulation, 1],
312+
)
313+
eggplantinbasket_result_savi = savi_simulation.run_on_sequence(
314+
eval_sim_eggplant_in_basket[permutation_idx_simulation, 0],
315+
eval_sim_eggplant_in_basket[permutation_idx_simulation, 1],
316+
)
317+
eggplantinbasket_result_step_500 = step_simulation_500.run_on_sequence(
318+
eval_sim_eggplant_in_basket[permutation_idx_simulation, 0],
319+
eval_sim_eggplant_in_basket[permutation_idx_simulation, 1],
320+
)
321+
322+
print()
323+
print("EGGPLANT IN BASKET: ")
324+
print()
325+
print("Lai-500 time-to-decision: ", eggplantinbasket_result_lai_500.info["Time"])
326+
print("STEP-500 time-to-decision: ", eggplantinbasket_result_step_500.info["Time"])
327+
print(
328+
"SAVI time-to-decision: ",
329+
eggplantinbasket_result_savi.info["result_for_alternative"].info["Time"],
330+
)
331+
332+
###########################
333+
### Result 5: StackCube ###
334+
###########################
335+
stackcube_result_lai_500 = lai_simulation_500.run_on_sequence(
336+
eval_sim_stack_cube[permutation_idx_simulation, 0],
337+
eval_sim_stack_cube[permutation_idx_simulation, 1],
338+
)
339+
stackcube_result_savi = savi_simulation.run_on_sequence(
340+
eval_sim_stack_cube[permutation_idx_simulation, 0],
341+
eval_sim_stack_cube[permutation_idx_simulation, 1],
342+
)
343+
stackcube_result_step_500 = step_simulation_500.run_on_sequence(
344+
eval_sim_stack_cube[permutation_idx_simulation, 0],
345+
eval_sim_stack_cube[permutation_idx_simulation, 1],
346+
)
347+
348+
print()
349+
print("STACK CUBE: ")
350+
print()
351+
print("Lai-500 time-to-decision: ", stackcube_result_lai_500.info["Time"])
352+
print("STEP-500 time-to-decision: ", stackcube_result_step_500.info["Time"])
353+
print(
354+
"SAVI time-to-decision: ",
355+
stackcube_result_savi.info["result_for_alternative"].info["Time"],
356+
)

0 commit comments

Comments
 (0)