This project is about recommendation system including rank&match models and metrics which are all implemented by tensorflow 2.x.
You can use these models with model.fit() οΌand model.predict() through tf.keras.Model.
The implement for tensorflow 1.x is in this github.
To install, simply use pip to pull down from PyPI.
pip install deep-rec-kitIf you want to use latest features, or develop new features, you can also build it from source.
git clone https://github.com/QunBB/RecSys
cd RecSys
pip install -e ....... means that it will be continuously updated.
| model | paper | blog | implemented |
|---|---|---|---|
| ...... | |||
| STEM | [KDD 2024] Ads Recommendation in a Collapsed and Entangled World | zhihu | β |
| PEPNet | [KDD 2023] PEPNet: Parameter and Embedding Personalized Network for Infusing with Personalized Prior Information | zhihu | β |
| M2M | [CIKM 2022] Leaving No One Behind: A Multi-Scenario Multi-Task Meta Learning Approach for Advertiser Modeling | zhihu | β |
| SAR-Net | [CIKM 2021] SAR-Net: A Scenario-Aware Ranking Network for Personalized Fair Recommendation in Hundreds of Travel Scenarios | zhihu | |
| Star | [CIKM 2021] One Model to Serve All: Star Topology Adaptive Recommender for Multi-Domain CTR Prediction | zhihu | β |
| PLE | [RecSys 2020] Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations | zhihu | β |
| MMoE | [KDD 2018] Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts | zhihu | β |
| model | paper | blog | implemented |
|---|---|---|---|
| ...... | |||
| Dual Augmented Two-tower Model | [DLP-KDD 2021] A Dual Augmented Two-tower Model for Online Large-scale Recommendation | zhihu | |
| ComiRec | [KDD 2020] Controllable Multi-Interest Framework for Recommendation | zhihu | |
| MIND | [CIKM 2019] Multi-Interest Network with Dynamic Routing for Recommendation at Tmall | zhihu | |
| Youtube DNN | [RecSys 2016] Deep Neural Networks for YouTube Recommendations | zhihu |
Metrics for recommendation system.
It will be coming soon.
import numpy as np
import tensorflow as tf
from recsys.feature import Field, Task
from recsys.multidomain.pepnet import pepnet
task_list = [
Task(name='click'),
Task(name='like'),
Task(name='fav')
]
num_domain = 3
def create_model():
fields = [
Field('uid', vocabulary_size=100),
Field('item_id', vocabulary_size=20, belong='item'),
Field('his_item_id', vocabulary_size=20, emb='item_id', length=20, belong='history'),
Field('context_id', vocabulary_size=20, belong='context'),
# domain's fields
Field(f'domain_id', vocabulary_size=num_domain, belong='domain'),
Field(f'domain_impression', vocabulary_size=1, belong='domain', dtype="float32")
]
model = pepnet(fields, task_list, [64, 32],
history_agg='attention', agg_kwargs={}
# history_agg='transformer', agg_kwargs={'num_layers': 1, 'd_model': 4, 'num_heads': 2, 'dff': 64}
)
print(model.summary())
return model
def create_dataset():
n_samples = 2000
np.random.seed(2024)
data = {
'uid': np.random.randint(0, 100, [n_samples]),
'item_id': np.random.randint(0, 20, [n_samples]),
'his_item_id': np.random.randint(0, 20, [n_samples, 20]),
'context_id': np.random.randint(0, 20, [n_samples]),
'domain_id': np.random.randint(0, num_domain, [n_samples]),
'domain_impression': np.random.random([n_samples])
}
labels = {t.name: np.random.randint(0, 2, [n_samples]) for t in task_list}
return data, labels
if __name__ == '__main__':
model = create_model()
data, labels = create_dataset()
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'])
model.fit(data, labels, batch_size=32, epochs=10)Those layers with prefix "dnn" will use the adam optimizer, and adagrad for prefix "embedding". Also, you must have the default optimizer for legacy layers.
import tensorflow as tf
from recsys.feature import Field, Task
from recsys.multidomain.pepnet import pepnet
task_list = [
Task(name='click'),
Task(name='like'),
Task(name='fav')
]
num_domain = 3
def create_model():
# absolutely same as the above ......
def create_dataset():
# absolutely same as the above ......
def train(data, labels):
model = create_model()
model.compile(optimizer={'dnn': 'adam', 'embedding': 'Adagrad', 'default': 'adam'},
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=['accuracy'])
model.fit(data, labels, batch_size=32, epochs=10)
checkpoint = tf.train.Checkpoint(model=model)
checkpoint.save('./pepnet-saved/model.ckpt')
print(model({k: v[:10] for k, v in data.items()}))
print(model.optimizer['embedding'].variables())
def restore(data):
model = create_model()
model.compile(optimizer={'dnn': 'adam', 'embedding': 'Adagrad', 'default': 'adam'},
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=['accuracy'])
checkpoint = tf.train.Checkpoint(model=model)
checkpoint.restore('./pepnet-saved/model.ckpt-1')
print(model({k: v[:10] for k, v in data.items()}))
for layer in model.optimizer:
model.optimizer[layer].build(model.special_layer_variables[layer])
print(model.optimizer['embedding'].variables())
if __name__ == '__main__':
data, labels = create_dataset()
train(data, labels)
restore(data)