update comment of callbacks

huangshiyu13 · huangshiyu13 · commit 2dc2bae55155 · 2023-06-21T19:51:21.000+08:00
diff --git a/examples/cartpole/README.md b/examples/cartpole/README.md
@@ -13,7 +13,7 @@ To train with [Dual-clip PPO](https://arxiv.org/abs/1912.09729):
 python train_ppo.py --config dual_clip_ppo.yaml
 ```
 
-If you want to save checkpoints, try to train with Callbacks:
+If you want to evaluate the agent during training and save the best model and save checkpoints, try to train with callbacks:
 
 ```shell
 python train_ppo.py --config callbacks.yaml
diff --git a/examples/cartpole/callbacks.yaml b/examples/cartpole/callbacks.yaml
@@ -1,19 +1,19 @@
 callbacks:
   - id: "CheckpointCallback"
     args: {
-        "save_freq": 500,
-        "save_path": "./results/checkpoints/",
-        "name_prefix": "ppo",
-        "save_replay_buffer": True
+        "save_freq": 500, # how often to save the model
+        "save_path": "./results/checkpoints/",  # where to save the model
+        "name_prefix": "ppo", # the prefix of the saved model
+        "save_replay_buffer": True # not work yet
     }
   - id: "EvalCallback"
     args: {
-      "eval_env": {"id": "CartPole-v1","env_num":4},
-      "n_eval_episodes": 4,
-      "eval_freq":500,
-      "log_path": "./results/eval_log_path",
-      "best_model_save_path": "./results/best_model/",
-      "deterministic": True,
-      "render": True,
-      "asynchronous": True,
+      "eval_env": {"id": "CartPole-v1","env_num":4}, # how many envs to set up for evaluation
+      "n_eval_episodes": 4, # how many episodes to run for each evaluation
+      "eval_freq": 500, # how often to run evaluation
+      "log_path": "./results/eval_log_path", # where to save the evaluation results
+      "best_model_save_path": "./results/best_model/", # where to save the best model
+      "deterministic": True, # whether to use deterministic action
+      "render": True, # whether to render the env
+      "asynchronous": True, # whether to run evaluation asynchronously
     }