fix and release 0.1.0

lucidrains · lucidrains · commit 0277167a2bf7 · 2023-05-29T09:27:05.000-07:00
diff --git a/MEGABYTE_pytorch/megabyte.py b/MEGABYTE_pytorch/megabyte.py
@@ -271,8 +271,8 @@ def __init__(
 
             if exists(next_h_dim) and next_h_dim != dim:
                 proj = nn.Sequential(
-                    nn.Linear(h_dim, next_h_dim * next_seq_len),
-                    Rearrange('... (n d) -> (...) n d', n = next_seq_len)
+                    nn.Linear(h_dim, next_h_dim * (next_seq_len + 1)),
+                    Rearrange('... (n d) -> (...) n d', n = next_seq_len + 1)
                 )
 
             self.to_next_transformer_projections.append(proj)
@@ -379,10 +379,6 @@ def forward(self, ids, return_loss = False):
                 stage_tokens,
             ), dim = -2)
 
-            # omit last token
-
-            stage_tokens = stage_tokens[:, :-1]
-
             # sum the previous hierarchy's representation
 
             if exists(prev_stage_tokens_repr):
@@ -394,11 +390,13 @@ def forward(self, ids, return_loss = False):
 
             # project for next stage in the hierarchy
 
-            prev_stage_tokens_repr = proj(attended)
+            prev_stage_tokens_repr = proj(attended[..., :-1, :])
 
         # project to logits
 
-        logits = self.to_logits(attended)        
+        logits = self.to_logits(attended)
+
+        logits = logits[..., 1:, :]
 
         if not return_loss:
 
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'MEGABYTE-pytorch',
   packages = find_packages(),
-  version = '0.0.10',
+  version = '0.1.0',
   license='MIT',
   description = 'MEGABYTE - Pytorch',
   long_description_content_type = 'text/markdown',