TSTransformerPlus(
(backbone): _TSTransformerBackbone(
(to_embedding): Sequential(
(0): Transpose(1, 2)
(1): Linear(in_features=4, out_features=128, bias=True)
)
(pos_dropout): Dropout(p=0.0, inplace=False)
(encoder): _TransformerEncoder(
(layers): ModuleList(
(0): ModuleList(
(0): MultiheadAttention(
(W_Q): Linear(in_features=128, out_features=128, bias=False)
(W_K): Linear(in_features=128, out_features=128, bias=False)
(W_V): Linear(in_features=128, out_features=128, bias=False)
(sdp_attn): ScaledDotProductAttention()
(to_out): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): Dropout(p=0, inplace=False)
)
)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): PositionwiseFeedForward(
(0): Linear(in_features=128, out_features=256, bias=True)
(1): ReGLU()
(2): Dropout(p=0, inplace=False)
(3): Linear(in_features=128, out_features=128, bias=True)
(4): Dropout(p=0, inplace=False)
)
(3): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(4): Identity()
)
(1): ModuleList(
(0): MultiheadAttention(
(W_Q): Linear(in_features=128, out_features=128, bias=False)
(W_K): Linear(in_features=128, out_features=128, bias=False)
(W_V): Linear(in_features=128, out_features=128, bias=False)
(sdp_attn): ScaledDotProductAttention()
(to_out): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): Dropout(p=0, inplace=False)
)
)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): PositionwiseFeedForward(
(0): Linear(in_features=128, out_features=256, bias=True)
(1): ReGLU()
(2): Dropout(p=0, inplace=False)
(3): Linear(in_features=128, out_features=128, bias=True)
(4): Dropout(p=0, inplace=False)
)
(3): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(4): Identity()
)
(2): ModuleList(
(0): MultiheadAttention(
(W_Q): Linear(in_features=128, out_features=128, bias=False)
(W_K): Linear(in_features=128, out_features=128, bias=False)
(W_V): Linear(in_features=128, out_features=128, bias=False)
(sdp_attn): ScaledDotProductAttention()
(to_out): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): Dropout(p=0, inplace=False)
)
)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): PositionwiseFeedForward(
(0): Linear(in_features=128, out_features=256, bias=True)
(1): ReGLU()
(2): Dropout(p=0, inplace=False)
(3): Linear(in_features=128, out_features=128, bias=True)
(4): Dropout(p=0, inplace=False)
)
(3): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(4): Identity()
)
(3): ModuleList(
(0): MultiheadAttention(
(W_Q): Linear(in_features=128, out_features=128, bias=False)
(W_K): Linear(in_features=128, out_features=128, bias=False)
(W_V): Linear(in_features=128, out_features=128, bias=False)
(sdp_attn): ScaledDotProductAttention()
(to_out): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): Dropout(p=0, inplace=False)
)
)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): PositionwiseFeedForward(
(0): Linear(in_features=128, out_features=256, bias=True)
(1): ReGLU()
(2): Dropout(p=0, inplace=False)
(3): Linear(in_features=128, out_features=128, bias=True)
(4): Dropout(p=0, inplace=False)
)
(3): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(4): Identity()
)
(4): ModuleList(
(0): MultiheadAttention(
(W_Q): Linear(in_features=128, out_features=128, bias=False)
(W_K): Linear(in_features=128, out_features=128, bias=False)
(W_V): Linear(in_features=128, out_features=128, bias=False)
(sdp_attn): ScaledDotProductAttention()
(to_out): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): Dropout(p=0, inplace=False)
)
)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): PositionwiseFeedForward(
(0): Linear(in_features=128, out_features=256, bias=True)
(1): ReGLU()
(2): Dropout(p=0, inplace=False)
(3): Linear(in_features=128, out_features=128, bias=True)
(4): Dropout(p=0, inplace=False)
)
(3): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(4): Identity()
)
(5): ModuleList(
(0): MultiheadAttention(
(W_Q): Linear(in_features=128, out_features=128, bias=False)
(W_K): Linear(in_features=128, out_features=128, bias=False)
(W_V): Linear(in_features=128, out_features=128, bias=False)
(sdp_attn): ScaledDotProductAttention()
(to_out): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): Dropout(p=0, inplace=False)
)
)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): PositionwiseFeedForward(
(0): Linear(in_features=128, out_features=256, bias=True)
(1): ReGLU()
(2): Dropout(p=0, inplace=False)
(3): Linear(in_features=128, out_features=128, bias=True)
(4): Dropout(p=0, inplace=False)
)
(3): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(4): Identity()
)
)
)
)
(head): Sequential(
(0): TokenLayer()
(1): LinBnDrop(
(0): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Linear(in_features=128, out_features=2, bias=False)
)
)
)