opendilab
diff --git a/‎ding/model/template/mavac.py‎
Lines changed: 1 addition & 0 deletions b/‎ding/model/template/mavac.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ding/model/template/ppg.py‎
Lines changed: 2 additions & 2 deletions b/‎ding/model/template/ppg.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ding/model/template/qac.py‎
Lines changed: 14 additions & 14 deletions b/‎ding/model/template/qac.py‎
Lines changed: 14 additions & 14 deletions
diff --git a/‎ding/model/template/qac_dist.py‎
Lines changed: 10 additions & 10 deletions b/‎ding/model/template/qac_dist.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎ding/model/template/tests/test_hybrid_qac.py‎
Lines changed: 4 additions & 4 deletions b/‎ding/model/template/tests/test_hybrid_qac.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎ding/model/template/tests/test_qac.py‎
Lines changed: 5 additions & 5 deletions b/‎ding/model/template/tests/test_qac.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎ding/model/template/tests/test_qac_dist.py‎
Lines changed: 5 additions & 5 deletions b/‎ding/model/template/tests/test_qac_dist.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎ding/model/template/tests/test_vac.py‎
Lines changed: 7 additions & 7 deletions b/‎ding/model/template/tests/test_vac.py‎
Lines changed: 7 additions & 7 deletions
@@ -28,6 +28,7 @@ def __init__(
             actor_head_layer_num: int = 2,
             critic_head_hidden_size: int = 64,
             critic_head_layer_num: int = 1,
+            action_space: str = 'discrete',
             activation: Optional[nn.Module] = nn.ReLU(),
             norm_type: Optional[str] = None,
     ) -> None:
 
@@ -14,8 +14,8 @@ def __init__(
             self,
             obs_shape: Union[int, SequenceType],
             action_shape: Union[int, SequenceType],
+            action_space: str = 'discrete',
             share_encoder: bool = True,
-            continuous: bool = False,
             encoder_hidden_size_list: SequenceType = [128, 128, 64],
             actor_head_hidden_size: int = 64,
             actor_head_layer_num: int = 2,
@@ -26,7 +26,7 @@ def __init__(
     ) -> None:
         super(PPG, self).__init__()
         self.actor_critic = VAC(
-            obs_shape, action_shape, share_encoder, continuous, encoder_hidden_size_list, actor_head_hidden_size,
+            obs_shape, action_shape, action_space, share_encoder, encoder_hidden_size_list, actor_head_hidden_size,
             actor_head_layer_num, critic_head_hidden_size, critic_head_layer_num, activation, norm_type
         )
         self.aux_critic = copy.deepcopy(self.actor_critic.critic)
 
@@ -23,7 +23,7 @@ def __init__(
             self,
             obs_shape: Union[int, SequenceType],
             action_shape: Union[int, SequenceType, EasyDict],
-            actor_head_type: str,
+            action_space: str,
             twin_critic: bool = False,
             actor_head_hidden_size: int = 64,
             actor_head_layer_num: int = 1,
@@ -39,7 +39,7 @@ def __init__(
             - obs_shape (:obj:`Union[int, SequenceType]`): Observation's space.
             - action_shape (:obj:`Union[int, SequenceType, EasyDict]`): Action's space, such as 4, (3, ), \
                 EasyDict({'action_type_shape': 3, 'action_args_shape': 4}).
-            - actor_head_type (:obj:`str`): Whether choose ``regression`` or ``reparameterization`` or ``hybrid`` .
+            - action_space (:obj:`str`): Whether choose ``regression`` or ``reparameterization`` or ``hybrid`` .
             - twin_critic (:obj:`bool`): Whether include twin critic.
             - actor_head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` to pass to actor-nn's ``Head``.
             - actor_head_layer_num (:obj:`int`): The num of layers used in the network to compute Q value output \
@@ -56,9 +56,9 @@ def __init__(
         obs_shape: int = squeeze(obs_shape)
         action_shape = squeeze(action_shape)
         self.action_shape = action_shape
-        self.actor_head_type = actor_head_type
-        assert self.actor_head_type in ['regression', 'reparameterization', 'hybrid']
-        if self.actor_head_type == 'regression':  # DDPG, TD3
+        self.action_space = action_space
+        assert self.action_space in ['regression', 'reparameterization', 'hybrid']
+        if self.action_space == 'regression':  # DDPG, TD3
             self.actor = nn.Sequential(
                 nn.Linear(obs_shape, actor_head_hidden_size), activation,
                 RegressionHead(
@@ -70,7 +70,7 @@ def __init__(
                     norm_type=norm_type
                 )
             )
-        elif self.actor_head_type == 'reparameterization':  # SAC
+        elif self.action_space == 'reparameterization':  # SAC
             self.actor = nn.Sequential(
                 nn.Linear(obs_shape, actor_head_hidden_size), activation,
                 ReparameterizationHead(
@@ -82,7 +82,7 @@ def __init__(
                     norm_type=norm_type
                 )
             )
-        elif self.actor_head_type == 'hybrid':  # PADDPG
+        elif self.action_space == 'hybrid':  # PADDPG
             # hybrid action space: action_type(discrete) + action_args(continuous),
             # such as {'action_type_shape': torch.LongTensor([0]), 'action_args_shape': torch.FloatTensor([0.1, -0.27])}
             action_shape.action_args_shape = squeeze(action_shape.action_args_shape)
@@ -110,7 +110,7 @@ def __init__(
             )
             self.actor = nn.ModuleList([actor_action_type, actor_action_args])
         self.twin_critic = twin_critic
-        if self.actor_head_type == 'hybrid':
+        if self.action_space == 'hybrid':
             critic_input_size = obs_shape + action_shape.action_type_shape + action_shape.action_args_shape
         else:
             critic_input_size = obs_shape + action_shape
@@ -194,7 +194,7 @@ def forward(self, inputs: Union[torch.Tensor, Dict], mode: str) -> Dict:
 
         Critic Examples:
             >>> inputs = {'obs': torch.randn(4,N), 'action': torch.randn(4,1)}
-            >>> model = QAC(obs_shape=(N, ),action_shape=1,actor_head_type='regression')
+            >>> model = QAC(obs_shape=(N, ),action_shape=1,action_space='regression')
             >>> model(inputs, mode='compute_critic')['q_value'] # q value
             tensor([0.0773, 0.1639, 0.0917, 0.0370], grad_fn=<SqueezeBackward1>)
 
@@ -245,13 +245,13 @@ def compute_actor(self, inputs: torch.Tensor) -> Dict:
             >>> actor_outputs['logit'][1].shape # sigma
             >>> torch.Size([4, 64])
         """
-        if self.actor_head_type == 'regression':
+        if self.action_space == 'regression':
             x = self.actor(inputs)
             return {'action': x['pred']}
-        elif self.actor_head_type == 'reparameterization':
+        elif self.action_space == 'reparameterization':
             x = self.actor(inputs)
             return {'logit': [x['mu'], x['sigma']]}
-        elif self.actor_head_type == 'hybrid':
+        elif self.action_space == 'hybrid':
             logit = self.actor[0](inputs)
             action_args = self.actor[1](inputs)
             return {'logit': logit['logit'], 'action_args': action_args['pred']}
@@ -284,14 +284,14 @@ def compute_critic(self, inputs: Dict) -> Dict:
 
         Examples:
             >>> inputs = {'obs': torch.randn(4, N), 'action': torch.randn(4, 1)}
-            >>> model = QAC(obs_shape=(N, ),action_shape=1,actor_head_type='regression')
+            >>> model = QAC(obs_shape=(N, ),action_shape=1,action_space='regression')
             >>> model(inputs, mode='compute_critic')['q_value']  # q value
             >>> tensor([0.0773, 0.1639, 0.0917, 0.0370], grad_fn=<SqueezeBackward1>)
         """
 
         obs, action = inputs['obs'], inputs['action']
         assert len(obs.shape) == 2
-        if self.actor_head_type == 'hybrid':
+        if self.action_space == 'hybrid':
             action_type_logit = inputs['logit']
             action_type_logit = torch.softmax(action_type_logit, dim=-1)
             action_args = action['action_args']
 
@@ -20,7 +20,7 @@ def __init__(
         self,
         obs_shape: Union[int, SequenceType],
         action_shape: Union[int, SequenceType],
-        actor_head_type: str = "regression",
+        action_space: str = "regression",
         critic_head_type: str = "categorical",
         actor_head_hidden_size: int = 64,
         actor_head_layer_num: int = 1,
@@ -38,7 +38,7 @@ def __init__(
         Arguments:
             - obs_shape (:obj:`Union[int, SequenceType]`): Observation's space.
             - action_shape (:obj:`Union[int, SequenceType]`): Action's space.
-            - actor_head_type (:obj:`str`): Whether choose ``regression`` or ``reparameterization``.
+            - action_space (:obj:`str`): Whether choose ``regression`` or ``reparameterization``.
             - critic_head_type (:obj:`str`): Only ``categorical``.
             - actor_head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` to pass to actor-nn's ``Head``.
             - actor_head_layer_num (:obj:`int`):
@@ -58,9 +58,9 @@ def __init__(
         super(QACDIST, self).__init__()
         obs_shape: int = squeeze(obs_shape)
         action_shape: int = squeeze(action_shape)
-        self.actor_head_type = actor_head_type
-        assert self.actor_head_type in ['regression', 'reparameterization']
-        if self.actor_head_type == 'regression':
+        self.action_space = action_space
+        assert self.action_space in ['regression', 'reparameterization']
+        if self.action_space == 'regression':
             self.actor = nn.Sequential(
                 nn.Linear(obs_shape, actor_head_hidden_size), activation,
                 RegressionHead(
@@ -72,7 +72,7 @@ def __init__(
                     norm_type=norm_type
                 )
             )
-        elif self.actor_head_type == 'reparameterization':
+        elif self.action_space == 'reparameterization':
             self.actor = nn.Sequential(
                 nn.Linear(obs_shape, actor_head_hidden_size), activation,
                 ReparameterizationHead(
@@ -156,7 +156,7 @@ def forward(self, inputs: Union[torch.Tensor, Dict], mode: str) -> Dict:
         Critic Examples:
             >>> # Categorical mode
             >>> inputs = {'obs': torch.randn(4,N), 'action': torch.randn(4,1)}
-            >>> model = QACDIST(obs_shape=(N, ),action_shape=1,actor_head_type='regression', \
+            >>> model = QACDIST(obs_shape=(N, ),action_shape=1,action_space='regression', \
             ...                 critic_head_type='categorical', n_atoms=51)
             >>> q_value = model(inputs, mode='compute_critic') # q value
             >>> assert q_value['q_value'].shape == torch.Size([4, 1])
@@ -204,9 +204,9 @@ def compute_actor(self, inputs: torch.Tensor) -> Dict:
             >>> torch.Size([4, 64])
         """
         x = self.actor(inputs)
-        if self.actor_head_type == 'regression':
+        if self.action_space == 'regression':
             return {'action': x['pred']}
-        elif self.actor_head_type == 'reparameterization':
+        elif self.action_space == 'reparameterization':
             return {'logit': [x['mu'], x['sigma']]}
 
     def compute_critic(self, inputs: Dict) -> Dict:
@@ -232,7 +232,7 @@ def compute_critic(self, inputs: Dict) -> Dict:
         Examples:
             >>> # Categorical mode
             >>> inputs = {'obs': torch.randn(4,N), 'action': torch.randn(4,1)}
-            >>> model = QACDIST(obs_shape=(N, ),action_shape=1,actor_head_type='regression', \
+            >>> model = QACDIST(obs_shape=(N, ),action_shape=1,action_space='regression', \
             ...                 critic_head_type='categorical', n_atoms=51)
             >>> q_value = model(inputs, mode='compute_critic') # q value
             >>> assert q_value['q_value'].shape == torch.Size([4, 1])
 
@@ -16,7 +16,7 @@
         'action_args_shape': (6, )
     }),
     'twin': True,
-    'actor_head_type': 'hybrid'
+    'action_space': 'hybrid'
 }
 
 
@@ -27,10 +27,10 @@ def test_hybrid_qac(
         self,
         action_shape=hybrid_args['action_shape'],
         twin=hybrid_args['twin'],
-        actor_head_type=hybrid_args['actor_head_type']
+        action_space=hybrid_args['action_space']
     ):
         N = 32
-        assert actor_head_type == 'hybrid'
+        assert action_space == 'hybrid'
         inputs = {
             'obs': torch.randn(B, N),
             'action': {
@@ -42,7 +42,7 @@ def test_hybrid_qac(
         model = QAC(
             obs_shape=(N, ),
             action_shape=action_shape,
-            actor_head_type=actor_head_type,
+            action_space=action_space,
             critic_head_hidden_size=embedding_size,
             actor_head_hidden_size=embedding_size,
             twin_critic=twin,
 
@@ -17,16 +17,16 @@
 
 
 @pytest.mark.unittest
-@pytest.mark.parametrize('action_shape, twin, actor_head_type', args)
+@pytest.mark.parametrize('action_shape, twin, action_space', args)
 class TestQAC:
 
-    def test_fcqac(self, action_shape, twin, actor_head_type):
+    def test_fcqac(self, action_shape, twin, action_space):
         N = 32
         inputs = {'obs': torch.randn(B, N), 'action': torch.randn(B, squeeze(action_shape))}
         model = QAC(
             obs_shape=(N, ),
             action_shape=action_shape,
-            actor_head_type=actor_head_type,
+            action_space=action_space,
             critic_head_hidden_size=embedding_size,
             actor_head_hidden_size=embedding_size,
             twin_critic=twin,
@@ -41,15 +41,15 @@ def test_fcqac(self, action_shape, twin, actor_head_type):
 
         # compute_action
         print(model)
-        if actor_head_type == 'regression':
+        if action_space == 'regression':
             action = model(inputs['obs'], mode='compute_actor')['action']
             if squeeze(action_shape) == 1:
                 assert action.shape == (B, )
             else:
                 assert action.shape == (B, squeeze(action_shape))
             assert action.eq(action.clamp(-1, 1)).all()
             is_differentiable(action.sum(), model.actor)
-        elif actor_head_type == 'reparameterization':
+        elif action_space == 'reparameterization':
             (mu, sigma) = model(inputs['obs'], mode='compute_actor')['logit']
             assert mu.shape == (B, *action_shape)
             assert sigma.shape == (B, *action_shape)
 
@@ -17,16 +17,16 @@
 
 
 @pytest.mark.unittest
-@pytest.mark.parametrize('action_shape, actor_head_type', args)
+@pytest.mark.parametrize('action_shape, action_space', args)
 class TestQACDIST:
 
-    def test_fcqac_dist(self, action_shape, actor_head_type):
+    def test_fcqac_dist(self, action_shape, action_space):
         N = 32
         inputs = {'obs': torch.randn(B, N), 'action': torch.randn(B, squeeze(action_shape))}
         model = QACDIST(
             obs_shape=(N, ),
             action_shape=action_shape,
-            actor_head_type=actor_head_type,
+            action_space=action_space,
             critic_head_hidden_size=embedding_size,
             actor_head_hidden_size=embedding_size,
         )
@@ -43,15 +43,15 @@ def test_fcqac_dist(self, action_shape, actor_head_type):
 
         # compute_action
         print(model)
-        if actor_head_type == 'regression':
+        if action_space == 'regression':
             action = model(inputs['obs'], mode='compute_actor')['action']
             if squeeze(action_shape) == 1:
                 assert action.shape == (B, )
             else:
                 assert action.shape == (B, squeeze(action_shape))
             assert action.eq(action.clamp(-1, 1)).all()
             is_differentiable(action.sum(), model.actor)
-        elif actor_head_type == 'reparameterization':
+        elif action_space == 'reparameterization':
             (mu, sigma) = model(inputs['obs'], mode='compute_actor')['logit']
             assert mu.shape == (B, *action_shape)
             assert sigma.shape == (B, *action_shape)
 
@@ -8,8 +8,8 @@
 
 B, C, H, W = 4, 3, 128, 128
 obs_shape = [4, (8, ), (4, 64, 64)]
-act_args = [[6, False], [(3, ), True], [[2, 3, 6], False]]
-#act_args = [[(3, ), True]]
+act_args = [[6, 'discrete'], [(3, ), 'continuous'], [[2, 3, 6], 'discrete']]
+# act_args = [[(3, ), True]]
 args = list(product(*[obs_shape, act_args, [False, True]]))
 
 
@@ -29,12 +29,12 @@ def test_vac(self, obs_shape, act_args, share_encoder):
             inputs = torch.randn(B, obs_shape)
         else:
             inputs = torch.randn(B, *obs_shape)
-        model = VAC(obs_shape, action_shape=act_args[0], continuous=act_args[1], share_encoder=share_encoder)
+        model = VAC(obs_shape, action_shape=act_args[0], action_space=act_args[1], share_encoder=share_encoder)
 
         outputs = model(inputs, mode='compute_actor_critic')
         value, logit = outputs['value'], outputs['logit']
-        if model.continuous:
-            outputs = value.sum() + logit[0].sum() + logit[1].sum()
+        if model.action_space == 'continuous':
+            outputs = value.sum() + logit['mu'].sum() + logit['sigma'].sum()
         else:
             if model.multi_head:
                 outputs = value.sum() + sum([t.sum() for t in logit])
@@ -45,8 +45,8 @@ def test_vac(self, obs_shape, act_args, share_encoder):
         for p in model.parameters():
             p.grad = None
         logit = model(inputs, mode='compute_actor')['logit']
-        if model.continuous:
-            logit = logit[0].sum() + logit[1].sum()
+        if model.action_space == 'continuous':
+            logit = logit['mu'].sum() + logit['sigma'].sum()
         self.output_check(model.actor, logit, model.action_shape)
 
         for p in model.parameters():