1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
| import torch from torch import nn import math from tensorboardX import SummaryWriter
class NeuMF(nn.Module): def __init__(self, num_users, num_items, latent_dim_mf, latent_dim_mlp, config_layers, learning_rate): super(NeuMF, self).__init__() self.num_users = num_users self.num_items = num_items self.latent_dim_mf = latent_dim_mf self.latent_dim_mlp = latent_dim_mlp self.config_layers = config_layers self.learning_rate = learning_rate self._writer = SummaryWriter(log_dir='logs')
self.embedding_user_mlp = nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.latent_dim_mlp) self.embedding_item_mlp = nn.Embedding(num_embeddings=self.num_items, embedding_dim=self.latent_dim_mlp) self.embedding_user_mf = nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.latent_dim_mf) self.embedding_item_mf = nn.Embedding(num_embeddings=self.num_items, embedding_dim=self.latent_dim_mf)
self.fc_layers = nn.ModuleList() input_size = self.latent_dim_mlp * 2 for output_size in self.config_layers: self.fc_layers.append(nn.Linear(input_size, output_size)) input_size = output_size
self.output_layer = nn.Linear(self.config_layers[-1] + self.latent_dim_mf, 1)
self.loss = nn.BCELoss() self.optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate) self._metrics = Metrics(top_k=10) self._init_weight_()
def _init_weight_(self): nn.init.normal_(self.embedding_user_mlp.weight, std=0.01) nn.init.normal_(self.embedding_item_mlp.weight, std=0.01) nn.init.normal_(self.embedding_user_mf.weight, std=0.01) nn.init.normal_(self.embedding_item_mf.weight, std=0.01)
for fc in self.fc_layers: nn.init.xavier_uniform_(fc.weight) nn.init.zeros_(fc.bias)
nn.init.kaiming_uniform_(self.output_layer.weight, nonlinearity='sigmoid') nn.init.zeros_(self.output_layer.bias)
def forward(self, user, item): user_mlp = self.embedding_user_mlp(user) item_mlp = self.embedding_item_mlp(item) mlp_vector = torch.cat([user_mlp, item_mlp], dim=-1)
for fc in self.fc_layers: mlp_vector = torch.relu(fc(mlp_vector))
user_mf = self.embedding_user_mf(user) item_mf = self.embedding_item_mf(item) mf_vector = torch.mul(user_mf, item_mf)
output_vector = torch.cat([mlp_vector, mf_vector], dim=-1)
rating_logit = torch.sigmoid(self.output_layer(output_vector)) return rating_logit
def train_single_batch(self, users, items, ratings): if torch.cuda.is_available(): users, items, ratings = users.cuda(), items.cuda(), ratings.cuda()
self.train() self.optimizer.zero_grad() ratings_pred = self.forward(users, items) loss = self.loss(ratings_pred.view(-1), ratings) loss.backward() self.optimizer.step() return loss.item()
def train_single_epoch(self, train_loader, epoch_id): self.train() total_loss = 0 for batch_id, (users, items, ratings) in enumerate(train_loader): loss = self.train_single_batch(users, items, ratings) print('Epoch {} Batch {} Loss {}'.format(epoch_id, batch_id, loss)) total_loss += loss self._writer.add_scalar('model/loss', total_loss, epoch_id) return total_loss
def evaluate(self, evaluate_data): self.eval() test_users, test_items, negative_users, negative_items = evaluate_data if torch.cuda.is_available(): test_users, test_items, negative_users, negative_items = test_users.cuda(), test_items.cuda(), negative_users.cuda(), negative_items.cuda()
test_scores = self.forward(test_users, test_items) negative_scores = self.forward(negative_users, negative_items)
self._metrics.set_subjects( test_users=test_users.cpu(), test_items=test_items.cpu(), test_scores=test_scores.cpu(), negative_users=negative_users.cpu(), negative_items=negative_items.cpu(), negative_scores=negative_scores.cpu() )
hit_ratio = self._metrics.cal_hit_ratio() ndcg = self._metrics.cal_ndcg()
self._writer.add_scalar('model/hit_ratio', hit_ratio, 0) self._writer.add_scalar('model/ndcg', ndcg, 0) print('Hit Ratio is {:.6f}, NDCG is {:.6f}'.format(hit_ratio, ndcg)) return hit_ratio, ndcg
class Metrics(object): def __init__(self, top_k): self._top_k = top_k
def set_subjects(self, test_users, test_items, test_scores, negative_users, negative_items, negative_scores): self._subjects = [] for i in range(len(test_users)): user = test_users[i].item() test_item = test_items[i].item() test_score = test_scores[i].view(-1).item() neg_items = negative_items[i*99:(i+1)*99].detach().numpy() neg_scores = negative_scores[i*99:(i+1)*99].view(-1).detach().numpy()
items = np.concatenate(([test_item], neg_items)) scores = np.concatenate(([test_score], neg_scores))
ranked_idx = np.argsort(-scores) ranked_items = items[ranked_idx] self._subjects.append({ 'user': user, 'ranked_items': ranked_items, 'test_item': test_item })
def cal_hit_ratio(self): hits = 0 for subject in self._subjects: if subject['test_item'] in subject['ranked_items'][:self._top_k]: hits += 1 return hits / len(self._subjects)
def cal_ndcg(self): total_ndcg = 0 for subject in self._subjects: try: rank = np.where(subject['ranked_items'] == subject['test_item'])[0][0] + 1 total_ndcg += math.log(2) / math.log(1 + rank) except IndexError: continue return total_ndcg / len(self._subjects)
|