• youtubeDNN模型实现2-网络模型结构


    1 embedding

    1.1 根据封装的特征创建embedding网络结构

    1.1.1 用户及物品特征封装

    # feature 封装
    feature_max_idx={'user_id': 4, 'movie_id': 208, 'gender': 3, 'age': 4, 'occupation': 4, 'zip': 4}
    embedding_dim=16
    #用户特征
    user_feature_columns = [SparseFeat('user_id', feature_max_idx['user_id'], embedding_dim),
                                SparseFeat("gender", feature_max_idx['gender'], embedding_dim),
                                SparseFeat("age", feature_max_idx['age'], embedding_dim),
                                SparseFeat("occupation", feature_max_idx['occupation'], embedding_dim),
                                SparseFeat("zip", feature_max_idx['zip'], embedding_dim),
                               VarLenSparseFeat(SparseFeat('hist_movie_id',feature_max_idx['movie_id'],embedding_dim
                                               ,embedding_name="movie_id" ),50,'mean','hist_len')
                               ]
    #物品特征                
    item_feature_columns = [SparseFeat('movie_id', feature_max_idx['movie_id'], embedding_dim)]
    

    1.1.2 获取单值离散特征和多值离散特征

    item_feature_name=item_feature_columns[0].name  # 'movie_id'
    item_vocabulary_size=item_feature_columns[0].vocabulary_size # 208
    feature_columns=user_feature_columns+item_feature_columns  
    
    # sparse_feature_columns
    sparse_feature_columns=list( filter( lambda x: isinstance(x,SparseFeat),feature_columns )) if feature_columns else [] 
    # varlen_sparse_feature_columns
    varlen_sparse_feature_columns=list( filter( lambda x: isinstance(x,VarLenSparseFeat),feature_columns )) if feature_columns else [] 
    

    1.1.3 离散特征的embedding 结构构造

    sparse_embedding_dic={}
    for feat in sparse_feature_columns:
        print(feat.embedding_name)
        emb=keras.layers.Embedding(feat.vocabulary_size,feat.embedding_dim
                                   ,name="sparse_emb_"+feat.embedding_name,trainable=feat.trainable)
        sparse_embedding_dic[feat.embedding_name]=emb
    
    for feat in varlen_sparse_feature_columns:
        print(feat.sparsefeat.embedding_name)
        emb=keras.layers.Embedding(feat.sparsefeat.vocabulary_size,feat.sparsefeat.embedding_dim
                                   ,name="sparse_seq_emb_"+feat.sparsefeat.name,trainable=feat.sparsefeat.trainable)
        sparse_embedding_dic[feat.sparsefeat.embedding_name]=emb
    
    print(sparse_embedding_dic)
    {'user_id': ,
     'gender': ,
     'age': ,
     'occupation': ,
     'zip': ,
     'movie_id': }
    

    2 input

    2.1 构造用户侧输入

    input_features = {}
    for feat in user_feature_columns:
        
        if isinstance(feat,SparseFeat):
            input_features[feat.name]=keras.layers.Input(shape=(1,),name=feat.name,dtype=feat.dtype)
        elif isinstance(feat,DenseFeat):
            input_features[feat.name]= keras.layers.Input(shape=(feat.dimension,),name=feat.name,dtype=feat.dtype)
        elif isinstance(feat,VarLenSparseFeat):
            input_features[feat.sparsefeat.name]= keras.layers.Input(shape=(feat.maxlen,),name=feat.sparsefeat.name,dtype=feat.sparsefeat.dtype)
        else :
            raise TypeError("Invalid feature column type ,got",type(fc))
    
    print(input_features )
    {'user_id': ,
     'gender': ,
     'age': ,
     'occupation': ,
     'zip': ,
     'hist_movie_id': }
    

    3 Input 结合 embedding层

    3.1 embedding(input)

    input_features 为每个特征的输入形式,
    sparse_embedding_dic为每个特征的 embedding结构
    单个特征embedding 输出为 sparse_embedding_dic[embedding_name](input_features[feature_name])

    user_sparse_feature_columns=list( filter( lambda x: isinstance(x,SparseFeat),user_feature_columns )) if user_feature_columns else [] 
    
    user_varlen_sparse_feature_columns=list( filter( lambda x: isinstance(x,VarLenSparseFeat),user_feature_columns )) if user_feature_columns else [] 
    
    embedding_dict={}
    for fc in user_sparse_feature_columns:
        feature_name=fc.name
        embedding_name=fc.embedding_name
        print(feature_name) 
    	embedding_dict[feature_name]=sparse_embedding_dic[embedding_name](input_features[feature_name])
    
    # varlen_embedding_lookup
    for fc in user_varlen_sparse_feature_columns:
        feature_name=fc.sparsefeat.name
        embedding_name=fc.sparsefeat.embedding_name
        print(feature_name)
        embedding_dict[feature_name]=sparse_embedding_dic[embedding_name](input_features[feature_name])
    
    
    print(embedding_dict)
    {'user_id': ,
     'gender': ,
     'age': ,
     'occupation': ,
     'zip': ,
     'hist_movie_id': }
    

    3.2 测试embedding(input)

    modelTest=keras.models.Model(inputs=input_features["user_id"],outputs=embedding_dict["user_id"])
    print(modelTest(tf.Variable([[1],[2]])))
    print("\n*************************************\n")
    
    modelTest.summary()
    
    
    Tensor("model_14/sparse_emb_user_id/embedding_lookup/Identity_1:0", shape=(2, 1, 16), dtype=float32)
    
    *************************************
    
    Model: "model_14"
    _________________________________________________________________
    Layer (type)                 Output Shape              Param #   
    =================================================================
    user_id (InputLayer)         [(None, 1)]               0         
    _________________________________________________________________
    sparse_emb_user_id (Embeddin (None, 1, 16)             64        
    =================================================================
    Total params: 64
    Trainable params: 64
    Non-trainable params: 0
    _________________________________________________________________
    

    3.3 SequencePoolingLayer

    多指特征embedding后进行pooling

    class SequencePoolingLayer(keras.layers.Layer):
        def __init__(self, mode='mean', supports_masking=False, **kwargs):
                super(SequencePoolingLayer, self).__init__(**kwargs)
        def call(self,inputs):
            return tf.expand_dims(tf.reduce_mean(inputs,axis=1), axis=1)
    pooling_vec_list={}
    pooling_vec_list['hist_movie_id']=SequencePoolingLayer()(embedding_dict['hist_movie_id'])
    pooling_vec_list['hist_movie_id']
    

    3.4 sparse_embedding_list

    sparse_embedding_list=[]
    list(embedding_dict.values())+list(pooling_vec_list.values())
    for key,value in embedding_dict.items():
        if key!="hist_movie_id":
            sparse_embedding_list.append(value)
    for key,value in pooling_vec_list.items():
        sparse_embedding_list.append(value)            
    print(sparse_embedding_list)
    
    [,
     ,
     ,
     ,
     ,
     ]
    

    4 拼接和打平 Concatenate and Flatten

    user_dnn_input=keras.layers.Flatten()(keras.layers.Concatenate(-1)(sparse_embedding_list))
    user_dnn_input
    
    

    5 item处理

    5.1 item Input

    item_input_features={}
    item_feature_columns
    for fc in item_feature_columns:
        if isinstance(fc, SparseFeat):
            item_input_features[fc.name] = keras.layers.Input(
                shape=(1,), name= fc.name, dtype=fc.dtype)
    item_inputs_list = list(item_input_features.values())
    item_inputs_list
    

    5.2 所有物品Embedding

    class EmbeddingIndex(keras.layers.Layer):
    
        def __init__(self, index, **kwargs):
            self.index = index
            super(EmbeddingIndex, self).__init__(**kwargs)
    
        def build(self, input_shape):
            super(EmbeddingIndex, self).build(
                input_shape)  # Be sure to call this somewhere!
    
        def call(self, x, **kwargs):
            return tf.constant(self.index)
    
    item_index=EmbeddingIndex(list(range(item_vocabulary_size)))(item_input_features[item_feature_name])
    item_embedding_weight=sparse_embedding_dic[item_feature_name](item_index)
    
    item_embedding_weight
    

    6 全连接层 DNN

    全连接层为两层,隐藏层为[64,16]

    hidden1 =keras.layers.Dense(64, activation='relu')(user_dnn_input)
    user_dnn_out =keras.layers.Dense(16, activation='relu')(user_dnn_input)
    user_dnn_out
    

    7 SampledSoftmaxLayer

    由于训练的全是正样本,所以采用抽样的softmax损失函数

    7.1 SampledSoftmaxLayer 输入

    ①全量物品embedding
    ② user_dnn_out
    ③ item Input
    结构如下

    embeddings=item_embedding_weight
    embeddings
    
    
    inputs=user_dnn_out
    inputs
    
    
    label_idx=item_input_features[item_feature_name]
    label_idx
    
    

    7.2 SampledSoftmaxLayer 层

    class SampledSoftmaxLayer(keras.layers.Layer):
        def __init__(self, num_sampled=5, **kwargs):
            self.num_sampled = num_sampled
            super(SampledSoftmaxLayer, self).__init__(**kwargs)
    
        def build(self, input_shape):
            self.size = input_shape[0][0]
            self.zero_bias = self.add_weight(shape=[self.size],
                                             initializer=keras.initializers.Zeros,
                                             dtype=tf.float32,
                                             trainable=False,
                                             name="bias")
            super(SampledSoftmaxLayer, self).build(input_shape)
    
        def call(self, inputs_with_label_idx, training=None, **kwargs):
    
            embeddings, inputs, label_idx = inputs_with_label_idx
    
            loss = tf.nn.sampled_softmax_loss(weights=embeddings,  # self.item_embedding.
                                              biases=self.zero_bias,
                                              labels=label_idx,
                                              inputs=inputs,
                                              num_sampled=self.num_sampled,
                                              num_classes=self.size,  # self.target_song_size
                                              )
    
            return tf.expand_dims(loss, axis=1)
    
        def compute_output_shape(self, input_shape):
            return (None, 1)
    
        def get_config(self, ):
            config = {'num_sampled': self.num_sampled}
            base_config = super(SampledSoftmaxLayer, self).get_config()
            return dict(list(base_config.items()) + list(config.items()))
    
    output=SampledSoftmaxLayer()([embeddings,inputs,label_idx])
    output
    
    

    8 模型构建

    模型输入为前面的user_inputs_list 和item_inputs_list,结构如下,模型输出为上面的output.

    user_inputs_list = list(input_features.values())
    print(user_inputs_list )
    print(item_inputs_list)
    model=keras.models.Model(inputs=user_inputs_list + item_inputs_list, outputs=output)
    
    
    	[,
    	 ,
    	 ,
    	 ,
    	 ,
    	 ]
    	 
    	[]
    
  • 相关阅读:
    PL/SQL 数组
    【译】IEEE白皮书 6G 太赫兹技术的基本原理 2023版
    聚合数据以科技赋能数字化转型,提升金融服务质效
    Uniswap 顶流之路:机制、决策与风险分析
    【新手上路常见问答】R和Python在数据科学的应用比较
    Mysql —— “子查询”内容上的打字练习
    NVIDIA Jetson Linux 35.1
    HashMap底层源码分析
    C# 入门—实现 Hello, World!
    LeetCode刷题---LRU缓存
  • 原文地址:https://blog.csdn.net/weixin_42529756/article/details/127097364