Skip to content

Commit 0fdd104

Browse files
authored
Merge pull request #18 from AKSW/entity2rec
Entity2rec
2 parents 4af1e3f + 9d135f9 commit 0fdd104

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+3916
-9
lines changed

config_files/test_entity2rec.yml

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
experiment:
2+
dataset:
3+
name: ml-100k
4+
item: # infos related to item dataset (mandatory, at least item_id)
5+
path: datasets/ml-100k/processed/item.csv
6+
extra_features: [movie_year, movie_title] # features(columns) beside item_id to be used
7+
user: # mandatory (at least user_id)
8+
path: datasets/ml-100k/processed/user.csv
9+
extra_features: [gender, occupation] # features beside user_id
10+
ratings: # mandatory (at least [user_id, item_id, rating])
11+
path: datasets/ml-100k/processed/rating.csv
12+
timestamp: True
13+
enrich:
14+
map_path: datasets/ml-100k/processed/map.csv
15+
enrich_path: datasets/ml-100k/processed/enriched.csv
16+
remove_unmatched: False
17+
properties: [subject, director]
18+
19+
preprocess:
20+
# - method: filter_by_rating
21+
# parameters:
22+
# threshold: 20
23+
# - method: binarize
24+
# parameters:
25+
# threshold: 4
26+
- method: filter_kcore
27+
parameters:
28+
k: 20
29+
iterations: 1
30+
target: user # user or rating
31+
32+
split:
33+
seed: 42
34+
# test:
35+
# method: random_by_ratio
36+
# level: global
37+
# p: 0.2
38+
# validation:
39+
# method: random_by_ratio
40+
# level: global
41+
# p: 0.2
42+
43+
# test:
44+
# method: timestamp_by_ratio
45+
# level: user
46+
# p: 0.1
47+
# validation:
48+
# level: user
49+
# method: timestamp_by_ratio
50+
# p: 0.2
51+
52+
# test:
53+
# method: fixed_timestamp
54+
# # type: global_level
55+
# timestamp: 890000000
56+
# validation:
57+
# method: fixed_timestamp
58+
# timestamp: 880000000
59+
60+
test:
61+
method: k_fold
62+
k: 5
63+
level: "user"
64+
65+
models:
66+
- name: entity2rec
67+
config:
68+
save_weights: True
69+
parameters:
70+
embedding_model: deepwalk_based
71+
embedding_model_kwargs:
72+
config:
73+
save_weights: True
74+
parameters:
75+
walk_len: 10
76+
p: 1.0
77+
q: 1.0
78+
n_walks: 50
79+
embedding_size: 64
80+
epochs: 1
81+
workers: 32
82+
frac_negative_candidates: 0.1
83+
seed: 42
84+
85+
evaluation:
86+
k: 5
87+
relevance_threshold: 0
88+
metrics: [MAP, nDCG]
89+
90+
report:
91+
file: "experiment_results/ml100k_enriched/e2rec_ratings_negative0.1.csv"

docs/source/getting_started/support.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,20 @@ Currently the supported Recommender System models are:
109109
- `iterations`: the number of iterations for the regularization propagation.
110110
- `mi`: the mi factor number that dictates how much of the start embedding will affect the final embedding, values fluctuate between `0` and `1`.
111111

112+
### entity2rec
113+
- Entity2Rec recommendation model based on Node2Vec.
114+
- Reference: Palumbo, Enrico, Giuseppe Rizzo, and Raphaël Troncy. 2017. Entity2rec: Learning user-item relatedness from knowledge graphs for top-n item recommendation. Proceedings of the eleventh ACM conference on recommender systems. 32-36.
115+
- Main parameters
116+
- `embedding_model`: the embedding model name of a previously implemented graph embedding model.
117+
- `embedding_model_kwargs`: arguments for the embedding model.
118+
- `collab_only`: using only collaboration filtering properties' embeddings for the recommendations.
119+
- `content_only`: using only item content properties' embeddings for the recommendations.
120+
- `social_only`: using only user social interaction properties' embeddings for the recommendations.
121+
- `workers`: the number of threads to be used in creating candidates for recommendations. `-1` automatically inputs the number of cores as the amount of workers. number of physical cores is recommended in case the computer needs to be usable for other tasks.
122+
- `frac_negative_candidates`: calculates a fraction from the amount of unrated items for a user to be used in the train data. Values between `0` and `1` with `0.1` recommended.
123+
- `seed`: seed for fixing the sampling of negative and positive examples for training.
124+
- `relevance`: the necessary relevance of an evaluation from a user to be counted as a recommendation.
125+
112126

113127
## Pre-processing Methods
114128

docs/source/imgs/framework.svg

Lines changed: 1 addition & 1 deletion
Loading

requirements_framework.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,6 @@ py-cpuinfo
1919
gputil
2020
psutil
2121
sentence-transformers
22-
graph-walker @ git+https://github.com/AlvaroJoseLopes/graph-walker
22+
graph-walker @ git+https://github.com/AlvaroJoseLopes/graph-walker
23+
SPARQLWrapper
24+
multiprocess

src/framework/dataloader/graph/graph.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def get_ratings_with_labels(self):
7676

7777
return ratings
7878

79-
def get_all_triples(self):
79+
def get_all_triples(self, return_type='str'):
8080
triples_return = {"head": [], "relation": [], "tail": []}
8181

8282
# ratings triples
@@ -86,25 +86,31 @@ def get_all_triples(self):
8686
for user, ratings in tqdm(ratings.items(), total=n_total, desc=desc):
8787
ratings.sort(key=lambda x: x[1], reverse=True)
8888
for rating in ratings:
89-
triples_return["head"].append(user.__str__())
89+
if return_type == "str": triples_return["head"].append(user.__str__())
90+
else: triples_return["head"].append(user)
9091
triples_return["relation"].append(f"rating{rating[1]}")
91-
triples_return["tail"].append(rating[0].__str__())
92+
if return_type == "str": triples_return["tail"].append(rating[0].__str__())
93+
else: triples_return["tail"].append(rating[0])
9294

9395
# user property triples
9496
user_properties = self.get_user_property_edges()
9597
desc = f"Generating user properties triples"
9698
for user, user_property in tqdm(user_properties, desc=desc):
97-
triples_return["head"].append(user.__str__())
99+
if return_type == "str": triples_return["head"].append(user.__str__())
100+
else: triples_return["head"].append(user)
98101
triples_return["relation"].append("is")
99-
triples_return["tail"].append(user_property.__str__())
102+
if return_type == "str": triples_return["tail"].append(user_property.__str__())
103+
else: triples_return["tail"].append(user_property)
100104

101105
# item property triples
102106
item_properties = self.get_item_property_edges()
103107
desc = f"Generating item properties triples"
104108
for item, item_property in tqdm(item_properties, desc=desc):
105-
triples_return["head"].append(item.__str__())
109+
if return_type == "str": triples_return["head"].append(item.__str__())
110+
else: triples_return["head"].append(item)
106111
triples_return["relation"].append("has")
107-
triples_return["tail"].append(item_property.__str__())
112+
if return_type == "str": triples_return["tail"].append(item_property.__str__())
113+
else: triples_return["tail"].append(item_property)
108114

109115
return pd.DataFrame(triples_return)
110116

src/framework/recommender/model2class.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,9 @@
4242
'ePHEN': {
4343
'submodule': 'ePHEN.model',
4444
'class': 'EPHEN'
45+
},
46+
'entity2rec': {
47+
'submodule': 'entity2rec.model',
48+
'class': 'Entity2Rec'
4549
}
4650
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# based on: https://github.com/D2KLab/entity2rec

0 commit comments

Comments
 (0)