add beartype

lucidrains · lucidrains · commit cdfa143e4263 · 2023-05-26T10:07:18.000-07:00
diff --git a/MEGABYTE_pytorch/megabyte.py b/MEGABYTE_pytorch/megabyte.py
@@ -9,6 +9,9 @@
 from einops import rearrange, reduce, repeat, pack, unpack
 from einops.layers.torch import Rearrange
 
+from beartype import beartype
+from beartype.typing import Tuple, Union
+
 from MEGABYTE_pytorch.attend import Attend
 
 from tqdm import tqdm
@@ -200,13 +203,15 @@ def forward(self, x):
 # main class
 
 class MEGABYTE(nn.Module):
+
+    @beartype
     def __init__(
         self,
         *,
         num_tokens,
-        dim,
-        depth: tuple,
-        max_seq_len: tuple,
+        dim: Union[Tuple, int],
+        depth: Tuple,
+        max_seq_len: Tuple,
         dim_head = 64,
         heads = 8,
         attn_dropout = 0.,
diff --git a/README.md b/README.md
@@ -49,7 +49,7 @@ sampled = model.generate(temperature = 0.9, filter_thres = 0.9) # (1, 1024, 4)
 
 ## Test
 
-Train on character-level enwik8 with patches of size 4 - length 4096
+Train on character-level enwik8 with patches of size 4 - length 8192
 
 ```bash
 $ python train.py
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'MEGABYTE-pytorch',
   packages = find_packages(),
-  version = '0.0.9',
+  version = '0.0.10',
   license='MIT',
   description = 'MEGABYTE - Pytorch',
   long_description_content_type = 'text/markdown',
@@ -16,6 +16,7 @@
     'transformers'
   ],
   install_requires=[
+    'beartype',
     'einops>=0.6.1',
     'torch>=1.10',
     'tqdm'
diff --git a/train.py b/train.py
@@ -17,9 +17,8 @@
 LEARNING_RATE = 2e-4
 VALIDATE_EVERY  = 100
 GENERATE_EVERY  = 500
-GENERATE_LENGTH = 1024
 PRIME_LEN = 100
-SEQ_LEN = 1024
+SEQ_LEN = 8192
 
 # helpers
 
@@ -38,9 +37,9 @@ def decode_tokens(tokens):
 
 model = MEGABYTE(
     num_tokens = 256,
-    dim = (512, 512),
-    depth = (6, 2),
-    max_seq_len = (1024, 4),
+    dim = (768, 512, 256),
+    depth = (6, 4, 2),
+    max_seq_len = (512, 4, 4),
     flash_attn = True
 ).cuda()
 
@@ -94,7 +93,7 @@ def __len__(self):
             loss = model(next(val_loader), return_loss = True)
             print(f'validation loss: {loss.item()}')
 
-    if i % GENERATE_EVERY == 0:
+    if i != 0 and i % GENERATE_EVERY == 0:
         model.eval()
         inp = random.choice(val_dataset)[:-1]
         prime_inp = inp[:PRIME_LEN]