• 多模态&多目标学习-vsn+transformer


    多模态:结构化数据(表格数据)+文本数据(或图片、音频)进行特征融合

    多目标:共享分特征处理部分,然后分别再次全连接激活,输出多个训练目标

    本文针对如上两个特点,基于TensorFlow2.0,实现了该网络,如下代码供参考。

    # -*- coding: utf-8 -*-
    """

    @Time  : 2022/7/29 16:21
    @Author: Breeze
    @File  : vsn+transfomer.py
    """
    import pandas as pd
    import os
    import sys
    import json

    _WORK_DIR = os.path.split(os.path.realpath(__file__))[0]
    sys.path.append(os.path.join(_WORK_DIR, '..'))
    import time
    import numpy as np
    import tensorflow as tf
    import tensorflow.keras.backend as K
    from tensorflow.keras.layers import Layer

    from focal_loss import sparse_categorical_focal_loss

    seed = 2022
    np.random.seed(seed)
    tf.random.set_seed(seed)
    print(tf.__version__)

    # 画出模型,需要GraphViz包。
    from tensorflow.keras.utils import plot_model

    # plot_model(NN, to_file='NN.png')

    def WEIGHT_COLUMN_NAME_map(x):
        return 1 if x >= 1 else 0

    with open('added_tokens.json', encoding='utf-8') as f:
        kvs = json.load(f)


    def get_default_value(x):
        if kvs.get(x):
            return kvs.get(x) - 28995
        return 0


    class GatedLinearUnit(tf.keras.layers.Layer):
        def __init__(self, units):
            super(GatedLinearUnit, self).__init__()
            self.linear = tf.keras.layers.Dense(units)
            self.sigmoid = tf.keras.layers.Dense(units, activation='sigmoid')

        def get_config(self):
            config = super(GatedLinearUnit, self).get_config()
            config["linear"] = self.linear
            config["sigmoid"] = self.sigmoid
            return config

        def call(self, inputs, training=None):
            return self.linear(inputs) * self.sigmoid(inputs)


    class GatedResidualNetwork(tf.keras.layers.Layer):
        def __init__(self, units, dropout_rate):
            super(GatedResidualNetwork, self).__init__()
            self.units = units
            self.elu_dense = tf.keras.layers.Dense(units, activation='elu')
            self.linear_dense = tf.keras.layers.Dense(units)
            self.dropout = tf.keras.layers.Dropout(dropout_rate)
            self.gated_linear_unit = GatedLinearUnit(units)
            self.layer_norm = tf.keras.layers.LayerNormalization()
            self.project = tf.keras.layers.Dense(units)

        def get_config(self):
            config = super(GatedResidualNetwork, self).get_config()
            config["units"] = self.units
            config["elu_dense"] = self.elu_dense
            config["linear_dense"] = self.linear_dense
            config["dropout"] = self.dropout
            config["gated_linear_unit"] = self.gated_linear_unit
            config["layer_norm"] = self.layer_norm
            config["project"] = self.project
            return config

        def call(self, inputs, training=None):
            x = self.elu_dense(inputs)
            x = self.linear_dense(x)
            x = self.dropout(x, training=training)
            if inputs.shape[-1] != self.units:
                inputs = self.project(inputs)
            else:
                self.project = None
            x = inputs + self.gated_linear_unit(x)

            x = self.layer_norm(x)
            return x


    class VariableSelection(tf.keras.layers.Layer):

        def __init__(self,
                     category_features,
                     numeric_features,
                     encoding_size,
                     dropout_rate,
                     category_feature_vocabulary,
                     project_dim
                     ):
            super(VariableSelection, self).__init__()
            # Create embedding layer for each  category features
            self.category_nums = len(category_features)
            self.embeddings = []
            # share embedding layers in multi-object learning
            for feature_name in category_features:
                feature_value = category_feature_vocabulary[feature_name]
                print('构建embedding', feature_name, len(feature_value) + 1, encoding_size)
                self.embeddings.append(
                    tf.keras.layers.Embedding(
                        input_dim=len(feature_value) + 1,
                        output_dim=encoding_size)
                )

            # Project the numeric feature to encoding_size using linear transformation
            self.project_layers = []
            for feature_name in numeric_features:
                self.project_layers.append(tf.keras.layers.Dense(units=encoding_size))

            self.features = category_features + numeric_features
            self.category_features = category_features
            self.numeric_features = numeric_features

            num_features = len(category_features) + len(numeric_features)
            # Create a GRN for each feature independently
            self.grns = list()
            for idx in range(num_features):
                grn = GatedResidualNetwork(encoding_size, dropout_rate)
                self.grns.append(grn)

            # Create a GRN for the concatenation of all the features
            self.grn_concat = GatedResidualNetwork(encoding_size, dropout_rate)
            self.softmax = tf.keras.layers.Dense(units=num_features, activation="softmax")

            # project output to project_dim
            # tf.nn.sigmoid(output)
            # self.transfer_layer = tf.keras.layers.Dense(project_dim, activation="sigmoid")
            self.transfer_layer = tf.keras.layers.Dense(project_dim, activation="softmax")

        def get_config(self):
            config = super(VariableSelection, self).get_config()
            config["grns"] = self.grns
            config["grn_concat"] = self.grn_concat
            config["softmax"] = self.softmax
            return config

        def call(self, inputs, training=None, mask=None):
            encoded_features = []
            # print('inputs:', inputs)
            for i, feature_name in enumerate(self.features):
                if feature_name in self.category_features:
                    feature_embedding = self.embeddings[i]
                    input_feature = inputs[:, i]
                    # print('encoded_feature', i, feature_name, feature_embedding.input_dim,feature_embedding.output_dim)
                    encoded_feature = feature_embedding(input_feature)
                else:
                    encoded_feature = tf.expand_dims(inputs[:, i], -1)
                    encoded_feature = self.project_layers[i - self.category_nums](encoded_feature)
                encoded_features.append(encoded_feature)

            v = tf.keras.layers.concatenate(encoded_features)
            # actually equal to attention fusion network
            v = self.grn_concat(v, training=training)
            # 在最后增加一维数据
            v = tf.expand_dims(self.softmax(v), axis=-1)

            x = []
            for idx, input in enumerate(encoded_features):
                x.append(self.grns[idx](input, training=training))

            x = tf.stack(x, axis=1)  # 横向维度扩展
            # output shape [batch_size, encoding_size]
            outputs = tf.squeeze(tf.matmul(v, x, transpose_a=True), axis=1)
            # transfer to project_dim
            outputs = self.transfer_layer(outputs, training=training)
            return outputs


    class MultiHeadSelfAttention(tf.keras.layers.Layer):
        """多头Attention"""
        def __init__(self, embed_dim, num_heads=8):
            super(MultiHeadSelfAttention, self).__init__()
            self.embed_dim = embed_dim
            self.num_heads = num_heads
            if embed_dim % num_heads != 0:
                raise ValueError(
                    f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
                )
            self.projection_dim = embed_dim // num_heads
            self.query_dense = tf.keras.layers.Dense(embed_dim)
            self.key_dense = tf.keras.layers.Dense(embed_dim)
            self.value_dense = tf.keras.layers.Dense(embed_dim)
            self.combine_heads = tf.keras.layers.Dense(embed_dim)

        def attention(self, query, key, value):
            score = tf.matmul(query, key, transpose_b=True)
            dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
            scaled_score = score / tf.math.sqrt(dim_key)
            weights = tf.nn.softmax(scaled_score, axis=-1)
            output = tf.matmul(weights, value)
            return output, weights

        def separate_heads(self, x, batch_size):
            x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
            return tf.transpose(x, perm=[0, 2, 1, 3])

        def call(self, inputs):
            # x.shape = [batch_size, seq_len, embedding_dim]
            batch_size = tf.shape(inputs)[0]
            query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
            key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
            value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
            query = self.separate_heads(
                query, batch_size
            )  # (batch_size, num_heads, seq_len, projection_dim)
            key = self.separate_heads(
                key, batch_size
            )  # (batch_size, num_heads, seq_len, projection_dim)
            value = self.separate_heads(
                value, batch_size
            )  # (batch_size, num_heads, seq_len, projection_dim)
            attention, weights = self.attention(query, key, value)
            attention = tf.transpose(
                attention, perm=[0, 2, 1, 3]
            )  # (batch_size, seq_len, num_heads, projection_dim)
            concat_attention = tf.reshape(
                attention, (batch_size, -1, self.embed_dim)
            )  # (batch_size, seq_len, embed_dim)
            output = self.combine_heads(
                concat_attention
            )  # (batch_size, seq_len, embed_dim)
            return output


    class TransformerBlock(tf.keras.layers.Layer):
        """Transformer的Encoder部分"""
        def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
            super(TransformerBlock, self).__init__()
            self.att = MultiHeadSelfAttention(embed_dim, num_heads)
            self.ffn = tf.keras.Sequential(
                [tf.keras.layers.Dense(ff_dim, activation="relu"), tf.keras.layers.Dense(embed_dim), ]
            )
            self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
            self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
            self.dropout1 = tf.keras.layers.Dropout(rate)
            self.dropout2 = tf.keras.layers.Dropout(rate)

        def call(self, inputs, training):
            attn_output = self.att(inputs)
            attn_output = self.dropout1(attn_output, training=training)
            out1 = self.layernorm1(inputs + attn_output)
            ffn_output = self.ffn(out1)
            ffn_output = self.dropout2(ffn_output, training=training)
            return self.layernorm2(out1 + ffn_output)


    class TokenAndPositionEmbedding(tf.keras.layers.Layer):
        """Transformer输入的编码层"""
        def __init__(self, maxlen, vocab_size, embed_dim):
            super(TokenAndPositionEmbedding, self).__init__()
            self.token_emb = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
            self.pos_emb = tf.keras.layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

        def call(self, x):
            maxlen = tf.shape(x)[-1]
            positions = tf.range(start=0, limit=maxlen, delta=1)
            positions = self.pos_emb(positions)
            x = self.token_emb(x)
            return x + positions


    class Trainer(tf.keras.Model):

        def __init__(self, pretrain_path,
                     train_data_path,
                     test_data_path,
                     save_dir,
                     labels=[],
                     category_features=[],
                     numeric_features=[],
                     encoding_size=10,
                     dropout_rate=0.5,
                     category_feature_vocabulary=[],
                     project_dim=10,
                     max_len=128
                     ):
            # `super(YourClass, self).__init__()`. Always start with this line.
            super(Trainer, self).__init__()
            self.max_len = max_len
            self.save_dir = save_dir
            self.embed_dim = 32  # Embedding size for each token
            self.num_heads = 2  # Number of attention heads
            self.ff_dim = 32  # Hidden layer size in feed forward network inside transformer
            self.vocab_size = 400
            self.optimizer = None

            def click_map(x):
                # input_embedding = tf.nn.embedding_lookup(W, input_x)
                words = str(x).split(',')
                words_idx = list(map(lambda word:get_default_value(word),words))

                pad_sequences =tf.keras.preprocessing.sequence.pad_sequences([words_idx],
                                                           maxlen=max_len,
                                                           dtype='float64',
                                                           padding='post',
                                                           truncating='pre',
                                                           value=0.)

                return pad_sequences[0].reshape(1,max_len)

            # 数据的加载
            train_data = pd.read_csv(train_data_path)
            self.x_train = train_data[category_features + numeric_features].values
            self.x_train_txt = train_data['last_click'].apply(click_map).values
            # df1 = self.x_train_txt.apply(pd.Series, index=[f'col{i}' for i in range(max_len)])
            self.x_train_txt = np.array([e for e in self.x_train_txt ]).reshape(len(train_data), max_len)
            self.x_train = np.concatenate([self.x_train, self.x_train_txt], axis=1)
            self.y_train = train_data[labels[0]].values
            self.y2_train = train_data[labels[1]].apply(WEIGHT_COLUMN_NAME_map).values
            # print(train_data.info())

            test_data = pd.read_csv(test_data_path)
            self.x_test = test_data[category_features + numeric_features]
            self.x_test_txt = test_data['last_click'].apply(click_map).values
            self.x_test_txt = np.array([e for e in self.x_test_txt]).reshape(len(self.x_test_txt), max_len)
            self.x_test = np.concatenate([self.x_test, self.x_test_txt], axis=1)

            self.y_test = test_data[labels[0]].values
            self.y2_test = test_data[labels[1]].apply(WEIGHT_COLUMN_NAME_map).values
            # print(test_data.info())

            del train_data, test_data

            #

            self.train_loss_1 = tf.keras.metrics.Mean(name='train_loss_1')
            self.train_auc_1 = tf.keras.metrics.AUC(name='train_auc_1')
            self.test_loss_1 = tf.keras.metrics.Mean(name='test_loss_1')
            self.test_auc_1 = tf.keras.metrics.AUC(name='test_auc_1')

            self.train_loss_2 = tf.keras.metrics.Mean(name='train_loss_2')
            self.train_auc_2 = tf.keras.metrics.AUC(name='train_auc_2')
            self.test_loss_2 = tf.keras.metrics.Mean(name='test_loss_2')
            self.test_auc_2 = tf.keras.metrics.AUC(name='test_auc_2')
            # 共享特征提取模块
            self.model = VariableSelection(category_features,
                                           numeric_features,
                                           encoding_size,
                                           dropout_rate,
                                           category_feature_vocabulary,
                                           project_dim)

            # 单词+位置编码
            self.embedding_layer = TokenAndPositionEmbedding(self.max_len, self.vocab_size, embed_dim=self.embed_dim)
            self.transformer_block = TransformerBlock(embed_dim=self.embed_dim, num_heads=self.num_heads, ff_dim=self.ff_dim)
            # 目标1网络模块
            self.h11 = tf.keras.layers.Dense(units=16, activation="relu"
                                             , name='out1'
                                             , kernel_regularizer=tf.keras.regularizers.l1(0.01)
                                             , bias_regularizer=tf.keras.regularizers.l1(0.01)
                                             , activity_regularizer=tf.keras.regularizers.l1(0.01)
                                             )
            self.h12 = tf.keras.layers.Dense(8, activation='relu')
            self.h13 = tf.keras.layers.Dense(8, activation='relu')
            # sigmoid 对应 cross_entropy, softmax sparse_categorical_focal_loss
            self.h14 = tf.keras.layers.Dense(project_dim, activation='softmax', name='out1')

            # 目标2网络模块
            self.h21 = tf.keras.layers.Dense(units=16, activation="relu"
                                             , name='out2'
                                             , kernel_regularizer=tf.keras.regularizers.l1(0.01)
                                             , bias_regularizer=tf.keras.regularizers.l1(0.01)
                                             , activity_regularizer=tf.keras.regularizers.l1(0.01)
                                             )
            self.h22 = tf.keras.layers.Dense(8, activation='relu')
            self.h23 = tf.keras.layers.Dense(8, activation='relu')
            self.h24 = tf.keras.layers.Dense(project_dim, activation='softmax', name='out2')

            # transformer部分隐藏层

            self.tf_l1 = tf.keras.layers.GlobalAveragePooling1D()
            self.tf_d1 = tf.keras.layers.Dropout(0.1)
            self.tf_l2 = tf.keras.layers.Dense(20, activation="relu")
            self.tf_d2 = tf.keras.layers.Dropout(0.1)
            self.tf_l3 = tf.keras.layers.Dense(10, activation="relu")

        def call(self, inputs, training=None, mask=None):
            # 
            click_inputs = inputs[:, -self.max_len:]
            # click_inputs_transpose = tf.transpose(click_inputs)
            embeddings = self.embedding_layer(click_inputs)
            transformer_out = self.transformer_block(embeddings, training)

            transformer_out = self.tf_l1(transformer_out)
            transformer_out = self.tf_d1(transformer_out)
            transformer_out = self.tf_l2(transformer_out)
            transformer_out = self.tf_d2(transformer_out)
            transformer_out = self.tf_l3(transformer_out)

            vsn_out = self.model(inputs[:,:-self.max_len], training, mask)
            out = tf.concat([transformer_out, vsn_out], axis=1)

            out1 = self.h11(out, training=training)
            out1 = self.h12(out1, training=training)
            out1 = self.h13(out1, training=training)
            out1 = self.h14(out1, training=training)

            out2 = self.h21(out, training=training)
            out2 = self.h22(out2, training=training)
            out2 = self.h23(out2, training=training)
            out2 = self.h24(out2, training=training)
            return out1, out2

        # 使用autograph机制转换成静态图加速
        # @tf.function
        def train_step(self, x, y1, y2, steps):
            # print('train_step:',x, y1, y2)
            with tf.GradientTape() as tape:
                # print('GradientTape:', x, y)
                pred1, pred2 = self(x, training=True)
                # loss = tf.keras.losses.sparse_categorical_crossentropy(y, pred)
                loss1 = sparse_categorical_focal_loss(y1, pred1, gamma=2)
                loss2 = sparse_categorical_focal_loss(y2, pred2, gamma=2)
                #     # Compute gradients
                gradients = tape.gradient(loss1 + loss2, self.trainable_variables,
                                          unconnected_gradients=tf.UnconnectedGradients.ZERO)

                # Update weights
                self.optimizer.apply_gradients(grads_and_vars=zip(gradients, self.trainable_variables))

                # Update metrics (includes the metric that tracks the loss)
                self.train_loss_1.update_state(loss1)
                self.train_auc_1.update_state(y1, pred1[:, 1])

                self.train_loss_2.update_state(loss2)
                self.train_auc_2.update_state(y2, pred2[:, 1])
                return loss1 + loss2

        # @tf.function  # 使用autograph机制转换成静态图加速
        def test_step(self, x, y1, y2):
            pred1, pred2 = self.call(x, training=False)
            # loss = tf.keras.losses.sparse_categorical_crossentropy(y, pred)
            loss_1 = sparse_categorical_focal_loss(y1, pred1, gamma=2)
            loss_2 = sparse_categorical_focal_loss(y2, pred2, gamma=2)

            # transfer predictions
            self.test_auc_1.update_state(y1, pred1[:, 1])
            self.test_loss_1.update_state(loss_1)
            self.test_auc_2.update_state(y2, pred2[:, 1])
            self.test_loss_2.update_state(loss_2)

        # @tf.function
        def train(self,
                  epochs,
                  batch_size=16,
                  lr=2e-5,
                  evaluation_steps=100
                  ):
            print('start train model ...')
            self.optimizer = tf.keras.optimizers.Adam(lr)

            train_data = tf.data.Dataset.from_tensor_slices((self.x_train, (self.y_train, self.y2_train))) \
                .shuffle(len(self.y_train)).batch(batch_size)
            test_data = tf.data.Dataset.from_tensor_slices((self.x_test, (self.y_test, self.y2_test))).batch(batch_size)

            def benchmark(dataset, num_epochs=2):
                start_time = time.perf_counter()
                for epoch_num in range(num_epochs):
                    for sample in dataset:
                        # Performing a training step
                        time.sleep(0.01)
                print("Execution time:", time.perf_counter() - start_time)
            # benchmark(train_data,20)
            # benchmark(test_data,20)

            best_auc = 0.
            step = 0
            for epoch in range(epochs):
                self.train_loss_1.reset_states()
                self.train_loss_2.reset_states()
                self.train_auc_1.reset_states()
                self.train_auc_2.reset_states()

                start = time.time()
                cnt = 0
                for x, (y1, y2) in train_data:
                    cnt += 1
                    # print('cnt is:', cnt, x, y)
                    self.train_step(x, y1, y2, cnt)
                    step += 1
                    if step > 0 and step % (evaluation_steps // 5) == 0:
                        print("Epoch {} step {} train Loss1 {}  Loss2 {}".format(epoch + 1, step
                                                                                 , self.train_loss_1.result()
                                                                                 , self.train_loss_2.result()))
                        print("Epoch {} step {} train auc1 {}   auc2  {}".format(epoch + 1, step
                                                                                 , self.train_auc_1.result()
                                                                                 , self.train_auc_2.result()))
                    # evaluate
                    if step > 0 and step % evaluation_steps == 0:
                        self.test_auc_1.reset_states()
                        self.test_auc_2.reset_states()
                        self.test_loss_1.reset_states()
                        self.test_loss_2.reset_states()
                        for x, (y1, y2) in test_data:
                            self.test_step(x, y1, y2)
                        cur_auc = self.test_auc_1.result()
                        cur_auc2 = self.test_auc_2.result()
                        print("Epoch {} Step {} test loss1 {} loss2 {}".format(epoch + 1, step
                                                                               , self.test_loss_1.result()
                                                                               , self.test_loss_2.result()))
                        print("Epoch {} Step {} test AUC1 {}  AUC2 {}".format(epoch + 1, step, cur_auc, cur_auc2 ))
                        if cur_auc > best_auc:
                            best_auc = cur_auc
                            print("save model...")
                            # tf.saved_model.save(self, self.save_dir)
                        print("best auc {}".format(best_auc))
                # time exhaust
                delta_t = time.time() - start
                h = int(delta_t // 3600)
                m = int((delta_t - 3600 * h) // 60)
                s = int(delta_t % 60)
                print("Epoch {} time exhaust: {}h-{}m-{}s".format(epoch + 1, h, m, s))


    if __name__ == '__main__':
        # Target feature name.
        TARGET_FEATURE_NAME = "target_label_2"
        # Weight column name.
        WEIGHT_COLUMN_NAME = "paid_installment_no"

        import argparse

        parser = argparse.ArgumentParser()
        parser.add_argument('--pretrained_path', default='./data/pretrained_model/vsn')
        parser.add_argument('--train_data_path', default='./train_data.csv')
        parser.add_argument('--test_data_path', default='./test_data.csv')
        parser.add_argument('--label', default=[TARGET_FEATURE_NAME, WEIGHT_COLUMN_NAME])

        parser.add_argument('--save_dir', default='./data/model')
        parser.add_argument('--lr', default=0.002, type=float)
        parser.add_argument('--batch_size', default=1024, type=int)
        parser.add_argument('--epochs', default=10, type=int)
        parser.add_argument('--evaluation_steps', default=500, type=int)
        parser.add_argument('--num_warmup_steps', default=20, type=int)
        parser.add_argument('--weight_decay_rate', default=0.01, type=float)

        args = parser.parse_args()
        top_n_obj_cols_a = {}
        with open('./data/obj_feature_value_A_biz.txt', encoding='utf-8') as f:
            for line in f.readlines():
                key, value_dict = line.split("|")
                kv = json.loads(value_dict.replace("'", "\""))
                top_n_obj_cols_a[key] = list(kv.values())
                # print(key,top_n_obj_cols[key])

        from data_util import digt_cols, obj_cols

        category_features = obj_cols
        numeric_features = digt_cols
        encoding_size = 10
        dropout_rate = 0.2
        category_feature_vocabulary = top_n_obj_cols_a
        project_dim = 2
        trainer = Trainer(args.pretrained_path,
                          train_data_path=args.train_data_path,
                          test_data_path=args.test_data_path,
                          save_dir=args.save_dir,
                          labels=args.label,
                          category_features=category_features,
                          numeric_features=numeric_features,
                          encoding_size=encoding_size,
                          dropout_rate=dropout_rate,
                          category_feature_vocabulary=category_feature_vocabulary,
                          project_dim=project_dim
                          )

        trainer.train(epochs=args.epochs, batch_size=args.batch_size, lr=args.lr, evaluation_steps=args.evaluation_steps)
     

  • 相关阅读:
    Vue3 computed根据返回的字段显示颜色
    数据平台发展史-从数据仓库数据湖到数据湖仓
    UNI-APP中如何通过配置访问代理,解决跨域问题
    所谓的35岁中年危机只不过是在还曾经的债、填曾经的坑罢了~
    【网安神器篇】——瑞士军刀Netcat
    面试官:讲讲MySql索引失效的几种情况
    同轴电缆技术参数(一)
    百度apollo自动驾驶planning代码学习-Apollo\modules\planning\common\IndexedList类代码详解
    学生Dreamweaver静态网页设计 基于HTML+CSS+JavaScript制作简食餐厅美食网站制作
    SpringMVC的零配置WebApplicationInitializer
  • 原文地址:https://blog.csdn.net/mtj66/article/details/126356839