stanford-futuredata/megablocks-v0.3.2.zip
stanford-futuredata-megablocks-aa67416
.gitignore
73 Bytes
.gitmodules
132 Bytes
Dockerfile
239 Bytes
LICENSE
22.1 kB
MANIFEST.in
54 Bytes
Makefile
181 Bytes
README.md
2.8 kB
csrc
cuda_util.h
1.3 kB
cumsum.h
4.6 kB
histogram.h
2.7 kB
indices.h
3.1 kB
ops.cu
745 Bytes
replicate.h
6.4 kB
sort.h
2.7 kB
docker.sh
195 Bytes
exp
dmoe
dmoe_125m_8gpu.sh
3.4 kB
dmoe_356m_8gpu.sh
3.4 kB
dmoe_46m_8gpu.sh
3.4 kB
dmoe_760m_8gpu.sh
3.4 kB
gpt2
gpt2_125m_1gpu.sh
2.8 kB
gpt2_125m_8gpu.sh
3.0 kB
gpt2_1315m_1gpu.sh
2.8 kB
gpt2_1315m_8gpu.sh
3.0 kB
gpt2_356m_1gpu.sh
2.8 kB
gpt2_356m_8gpu.sh
3.0 kB
gpt2_46m_1gpu.sh
2.8 kB
gpt2_46m_8gpu.sh
3.0 kB
gpt2_760m_1gpu.sh
2.8 kB
gpt2_760m_8gpu.sh
3.0 kB
moe
moe_125m_8gpu.sh
3.4 kB
moe_356m_8gpu.sh
3.4 kB
moe_46m_8gpu.sh
3.4 kB
media
dropping_end_to_end.png
1.6 MB
megablocks
__init__.py
59 Bytes
backend
__init__.py
1 Byte
kernels.py
16.5 kB
benchmark_util.py
817 Bytes
grouped_gemm_util.py
488 Bytes
layers
__init__.py
1 Byte
all_to_all.py
1.4 kB
arguments.py
2.5 kB
common.py
606 Bytes
dmoe.py
10.3 kB
dmoe_test.py
6.6 kB
gelu.py
1.2 kB
memory_test.py
2.9 kB
memory_test.sh
246 Bytes
mlp.py
20.2 kB
moe.py
16.8 kB
moe_test.py
4.4 kB
mpu.py
3.1 kB
parallelism_test.py
4.5 kB
parallelism_test.sh
251 Bytes
router.py
2.2 kB
testing.py
1.0 kB
weight_parallel.py
11.4 kB
ops
__init__.py
707 Bytes
all_to_all_benchmark.py
1.5 kB
all_to_all_benchmark.sh
252 Bytes
binned_gather.py
736 Bytes
binned_gather_test.py
2.0 kB
binned_scatter.py
1.2 kB
binned_scatter_test.py
2.4 kB
cumsum.py
1.2 kB
cumsum_test.py
1.1 kB
gather.py
755 Bytes
histogram.py
515 Bytes
histogram_benchmark.py
2.1 kB
histogram_test.py
2.4 kB
matmul_benchmark.py
11.5 kB
padded_gather.py
860 Bytes
padded_gather_test.py
2.7 kB
padded_scatter.py
2.7 kB
padded_scatter_benchmark.py
1.7 kB
padded_scatter_test.py
4.0 kB
permute_benchmark.py
4.8 kB
repeat.py
122 Bytes
replicate.py
961 Bytes
replicate_test.py
3.1 kB
round_up.py
332 Bytes
scatter.py
2.4 kB
sort.py
775 Bytes
sort_benchmark.py
2.2 kB
sort_test.py
1.8 kB
sum.py
120 Bytes
topology.py
944 Bytes
topology_test.py
2.3 kB
turbo_util.py
483 Bytes
requirements.txt
177 Bytes
setup.py
1.5 kB
third_party
Megatron-LM
yamls
matmul_benchmark.yaml
694 Bytes
triton_benchmark.yaml
427 Bytes