# 最新干货！Transformer文本分类代码 - 知乎

03/19 20:07

https://zhuanlan.zhihu.com/p/105036982

https://github.com/lyeoni/nlp-tutorial/tree/master/text-classification-transformergithub.com

Transformer模型(基于论文《Attention is All You Need》)遵循与标准序列模型相同的一般模式，即从一个序列到另一个序列的注意力模型。

``````def get_sinusoid_table(self, seq_len, d_model):
def get_angle(pos, i, d_model):
return pos / np.power(10000, (2 * (i//2)) / d_model)

sinusoid_table = np.zeros((seq_len, d_model))
for pos in range(seq_len):
for i in range(d_model):
if i%2 == 0:
sinusoid_table[pos, i] = np.sin(get_angle(pos, i, d_model))
else:
sinusoid_table[pos, i] = np.cos(get_angle(pos, i, d_model))

• Pointwise feed forward networks

Multi-head attention consists of four parts:

1. Linear layers and split into heads
4. Final linear layer

Pointwise feed forward networks

``````class PositionWiseFeedForwardNetwork(nn.Module):
def __init__(self, d_model, d_ff):
super(PositionWiseFeedForwardNetwork, self).__init__()

self.linear1 = nn.Linear(d_model, d_ff)
self.linear2 = nn.Linear(d_ff, d_model)
self.relu = nn.ReLU()

def forward(self, inputs):
# |inputs| : (batch_size, seq_len, d_model)

output = self.relu(self.linear1(inputs))
# |output| : (batch_size, seq_len, d_ff)
output = self.linear2(output)
# |output| : (batch_size, seq_len, d_model)
return output``````

``````class EncoderLayer(nn.Module):
def __init__(self, d_model, n_heads, p_drop, d_ff):
super(EncoderLayer, self).__init__()

self.dropout1 = nn.Dropout(p_drop)
self.layernorm1 = nn.LayerNorm(d_model, eps=1e-6)

self.ffn = PositionWiseFeedForwardNetwork(d_model, d_ff)
self.dropout2 = nn.Dropout(p_drop)
self.layernorm2 = nn.LayerNorm(d_model, eps=1e-6)

# |inputs| : (batch_size, seq_len, d_model)
# |attn_mask| : (batch_size, seq_len, seq_len)

attn_outputs, attn_weights = self.mha(inputs, inputs, inputs, attn_mask)
attn_outputs = self.dropout1(attn_outputs)
attn_outputs = self.layernorm1(inputs + attn_outputs)
# |attn_outputs| : (batch_size, seq_len(=q_len), d_model)
# |attn_weights| : (batch_size, n_heads, q_len, k_len)

ffn_outputs = self.ffn(attn_outputs)
ffn_outputs = self.dropout2(ffn_outputs)
ffn_outputs = self.layernorm2(attn_outputs + ffn_outputs)
# |ffn_outputs| : (batch_size, seq_len, d_model)

return ffn_outputs, attn_weights``````

### References

• Attention Is All You Need
• SEQUENCE-TO-SEQUENCE MODELING WITH NN.TRANSFORMER AND TORCHTEXT
• Transformer model for language understanding

0
0 收藏

0 评论
0 收藏
0