From 87157161d42af631471156a888f16c9a9b7f4bc1 Mon Sep 17 00:00:00 2001 From: ShiYu <65760973+shiyu-coder@users.noreply.github.com> Date: Tue, 16 Sep 2025 10:35:14 +0800 Subject: [PATCH] Bug fix --- model/module.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/model/module.py b/model/module.py index d79a5b1..72c0619 100644 --- a/model/module.py +++ b/model/module.py @@ -370,7 +370,8 @@ class MultiHeadAttentionWithRoPE(nn.Module): q, k, v, attn_mask=attn_mask, dropout_p=self.attn_dropout_p, - is_causal=True + is_causal=True, + training=self.training ) attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, self.d_model) @@ -577,3 +578,4 @@ class TemporalEmbedding(nn.Module): +