moe_fp8_block_scale_ds_routing_topk8_ng8_kg4_e32_h7168_i2048

moe

FP8 block scale MoE operation. Routing and two grouped-GEMM included.

Axes

seq_len

var

num_experts

256

num_local_experts

hidden_size

7168

intermediate_size

2048

gemm1_out_size

4096

num_hidden_blocks

num_intermediate_blocks

num_gemm1_out_blocks

Name	Type	Shape
routing_logits	float32	[seq_len, num_experts]
routing_bias	bfloat16	[num_experts]
hidden_states	float8_e4m3fn	[seq_len, hidden_size]
hidden_states_scale	float32	[num_hidden_blocks, seq_len]
gemm1_weights	float8_e4m3fn	[num_local_experts, gemm1_out_size, hidden_size]
gemm1_weights_scale	float32	[num_local_experts, num_gemm1_out_blocks, num_hidden_blocks]
gemm2_weights	float8_e4m3fn	[num_local_experts, hidden_size, intermediate_size]
gemm2_weights_scale	float32	[num_local_experts, num_hidden_blocks, num_intermediate_blocks]
local_expert_offset	int32	Scalar
routed_scaling_factor	float32	Scalar

Name	Type	Shape
output	bfloat16	[seq_len, hidden_size]

Loading editor...

Loading solutions…