Skip to content

Commit c55cd60

Browse files
authored
[Bugfix] Python package do not have __version__ (#104)
Also fix some format issues in python docstring.
1 parent 0bedda7 commit c55cd60

File tree

5 files changed

+25
-19
lines changed

5 files changed

+25
-19
lines changed

python/flashinfer/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,9 @@
3434
BatchPrefillWithSharedPrefixPagedKVCacheWrapper,
3535
)
3636
from .page import append_paged_kv_cache
37+
38+
try:
39+
from ._build_meta import __version__ as __version__
40+
except ImportError:
41+
with open("version.txt") as f:
42+
__version__ = f.read().strip()

python/flashinfer/cascade.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def merge_state_in_place(
117117
s_other : torch.Tensor
118118
The other logsumexp value to be merged, expected to be a float32 tensor,
119119
shape: ``(seq_len, num_heads)``.
120-
120+
121121
Example
122122
-------
123123
>>> import torch
@@ -135,7 +135,7 @@ def merge_state_in_place(
135135

136136

137137
def merge_states(v: torch.Tensor, s: torch.Tensor):
138-
r"""Merge multiple attention states (v, s).
138+
r"""Merge multiple attention states (v, s).
139139
140140
Parameters
141141
----------
@@ -154,7 +154,7 @@ def merge_states(v: torch.Tensor, s: torch.Tensor):
154154
S : torch.Tensor
155155
The logsumexp value from the merged KV-segments, shape:
156156
``[seq_len, num_heads]``.
157-
157+
158158
Example
159159
-------
160160
>>> import torch
@@ -229,7 +229,7 @@ def batch_decode_with_shared_prefix_padded_kv_cache(
229229
-------
230230
V : torch.Tensor
231231
The attention output, shape: ``[batch_size, num_heads, head_dim]``
232-
232+
233233
Example
234234
-------
235235
>>> import torch
@@ -312,7 +312,7 @@ class BatchDecodeWithSharedPrefixPagedKVCacheWrapper:
312312
... )
313313
>>> batch_size = 7
314314
>>> shared_prefix_len = 8192
315-
>>> unique_kv_page_indices = torch.arange(max_num_pages).int().to("cuda:0")
315+
>>> unique_kv_page_indices = torch.arange(max_num_pages).int().to("cuda:0")
316316
>>> unique_kv_page_indptr = torch.tensor(
317317
... [0, 17, 29, 44, 48, 66, 100, 128], dtype=torch.int32, device="cuda:0"
318318
... )
@@ -355,7 +355,7 @@ class BatchDecodeWithSharedPrefixPagedKVCacheWrapper:
355355
... # compute batch decode attention, reuse auxiliary data structures for all layers
356356
... o = wrapper.forward(q, k_shared, v_shared, unique_kv_data)
357357
... outputs.append(o)
358-
...
358+
...
359359
>>> # clear auxiliary data structures
360360
>>> wrapper.end_forward()
361361
>>> outputs[0].shape
@@ -547,7 +547,7 @@ class BatchPrefillWithSharedPrefixPagedKVCacheWrapper:
547547
>>> qo_indptr = torch.tensor(
548548
... [0, 33, 44, 55, 66, 77, 88, nnz_qo], dtype=torch.int32, device="cuda:0"
549549
... )
550-
>>> paged_kv_indices = torch.arange(max_num_pages).int().to("cuda:0")
550+
>>> paged_kv_indices = torch.arange(max_num_pages).int().to("cuda:0")
551551
>>> paged_kv_indptr = torch.tensor(
552552
... [0, 17, 29, 44, 48, 66, 100, 128], dtype=torch.int32, device="cuda:0"
553553
... )
@@ -590,7 +590,7 @@ class BatchPrefillWithSharedPrefixPagedKVCacheWrapper:
590590
... q, k_shared, v_shared, kv_data, causal=True
591591
... )
592592
... outputs.append(o)
593-
...
593+
...
594594
s[0].shape>>> # clear auxiliary data structures
595595
>>> prefill_wrapper.end_forward()
596596
>>> outputs[0].shape

python/flashinfer/decode.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def batch_decode_with_padded_kv_cache(
179179
-------
180180
torch.Tensor
181181
The attention output, shape: ``[batch_size, num_qo_heads, head_dim]``.
182-
182+
183183
Examples
184184
--------
185185
>>> import torch
@@ -270,7 +270,7 @@ def batch_decode_with_padded_kv_cache_return_lse(
270270
The attention output, shape: [batch_size, num_qo_heads, head_dim]
271271
S : torch.Tensor
272272
The logsumexp of attention scores, Shape: [batch_size, num_qo_heads]
273-
273+
274274
Examples
275275
--------
276276
>>> import torch
@@ -342,7 +342,7 @@ class BatchDecodeWithPagedKVCacheWrapper:
342342
... workspace_buffer, "NHD"
343343
... )
344344
>>> batch_size = 7
345-
>>> kv_page_indices = torch.arange(max_num_pages).int().to("cuda:0")
345+
>>> kv_page_indices = torch.arange(max_num_pages).int().to("cuda:0")
346346
>>> kv_page_indptr = torch.tensor(
347347
... [0, 17, 29, 44, 48, 66, 100, 128], dtype=torch.int32, device="cuda:0"
348348
... )
@@ -374,7 +374,7 @@ class BatchDecodeWithPagedKVCacheWrapper:
374374
... # compute batch decode attention, reuse auxiliary data structures for all layers
375375
... o = decode_wrapper.forward(q, kv_data)
376376
... outputs.append(o)
377-
...
377+
...
378378
>>> # clear auxiliary data structures
379379
>>> decode_wrapper.end_forward()
380380
>>> outputs[0].shape
@@ -589,7 +589,7 @@ def forward_return_lse(
589589
The attention output, shape: ``[batch_size, num_qo_heads, head_dim]``.
590590
S : torch.Tensor
591591
The logsumexp of attention scores, Shape: ``[batch_size, num_qo_heads]``.
592-
592+
593593
Notes
594594
-----
595595
Please refer to the :ref:`tutorial <recursive-attention>` for a detailed

python/flashinfer/page.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def append_paged_kv_cache(
6767
shape: ``[batch_size]``.
6868
kv_layout : str
6969
The layout of the paged kv-cache, either ``NHD`` or ``HND``.
70-
70+
7171
Example
7272
-------
7373
>>> import torch
@@ -96,7 +96,7 @@ def append_paged_kv_cache(
9696
>>> # 25 = (2 - 1) * 16 + 9
9797
>>> # 22 = (2 - 1) * 16 + 6
9898
>>> kv_last_page_len = torch.tensor([13, 8, 9, 6], dtype=torch.int32, device="cuda:0")
99-
>>>
99+
>>>
100100
>>> flashinfer.append_paged_kv_cache(
101101
... k_append,
102102
... v_append,
@@ -111,7 +111,7 @@ def append_paged_kv_cache(
111111
-----
112112
Please refer to the :ref:`tutorial <recursive-attention>` for a detailed
113113
explanation of the log-sum-exp function and attention states.
114-
114+
115115
The function assumes that the space for appended k/v have already been allocated,
116116
which means :attr:`kv_indices`, :attr:`kv_indptr`, :attr:`kv_last_page_len` has
117117
incorporated appended k/v.

python/flashinfer/prefill.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ class BatchPrefillWithPagedKVCacheWrapper:
277277
>>> qo_indptr = torch.tensor(
278278
... [0, 33, 44, 55, 66, 77, 88, nnz_qo], dtype=torch.int32, device="cuda:0"
279279
... )
280-
>>> paged_kv_indices = torch.arange(max_num_pages).int().to("cuda:0")
280+
>>> paged_kv_indices = torch.arange(max_num_pages).int().to("cuda:0")
281281
>>> paged_kv_indptr = torch.tensor(
282282
... [0, 17, 29, 44, 48, 66, 100, 128], dtype=torch.int32, device="cuda:0"
283283
... )
@@ -308,7 +308,7 @@ class BatchPrefillWithPagedKVCacheWrapper:
308308
... q, kv_data, causal=True
309309
... )
310310
... outputs.append(o)
311-
...
311+
...
312312
>>> # clear auxiliary data structures
313313
>>> prefill_wrapper.end_forward()
314314
>>> outputs[0].shape
@@ -582,7 +582,7 @@ class BatchPrefillWithRaggedKVCacheWrapper:
582582
... q, k, v, causal=True
583583
... )
584584
... outputs.append(o)
585-
...
585+
...
586586
>>> # clear auxiliary data structures
587587
>>> prefill_wrapper.end_forward()
588588
>>> outputs[0].shape

0 commit comments

Comments
 (0)