#概要
neo4jでsandboxやってみた。
movielens、やってみた。
レコメンドしてみた。
#コサイン類似度で映画をお勧め。
MATCH (p1:User {id:21})-[x:RATING]->(m:Movie)<-[y:RATING]-(p2:User)
WITH COUNT(m) AS numbermovies,
SUM(x.rating * y.rating) AS xyDotProduct,
SQRT(REDUCE(xDot = 0.0, a IN COLLECT(x.rating) | xDot + a ^ 2)) AS xLength,
SQRT(REDUCE(yDot = 0.0, b IN COLLECT(y.rating) | yDot + b ^ 2)) AS yLength,
p1,
p2
WHERE numbermovies > 10
WITH p1,
p2,
xyDotProduct / (xLength * yLength) AS sim
ORDER BY sim DESC
LIMIT 10
MATCH (p2)-[r:RATING]->(m:Movie)
WHERE NOT EXISTS((p1)-[:RATING]->(m))
RETURN p1.name,
p2.name,
m.title,
SUM(sim * r.rating) AS score
ORDER BY score DESC
LIMIT 5
#python実行結果
title score
Shawshank Redemption, The (1994) 29.503255423721242
WALL揃E (2008) 26.663597071412074
Inglourious Basterds (2009) 25.151535146327763
Whiplash (2014) 17.84941121812618
Shutter Island (2010) 16.417668233642118
以上。