集合知プログラミング 2章をrubyで書いた
書きました。
APIを使うところは省略しています
critics = { 'Lisa Rose' => { 'Lady in the Water' => 2.5, 'Snakes on a Plane' => 3.5, 'Just My Luck' => 3.0, 'Superman Returns' => 3.5, 'You, Me and Dupree' => 2.5, 'The Night Listener' => 3.0 }, 'Gene Seymour' => { 'Lady in the Water' => 3.0, 'Snakes on a Plane' => 3.5, 'Just My Luck' => 1.5, 'Superman Returns' => 5.0, 'The Night Listener' => 3.0, 'You, Me and Dupree' => 3.5 }, 'Michael Phillips' => { 'Lady in the Water' => 2.5, 'Snakes on a Plane' => 3.0, 'Superman Returns' => 3.5, 'The Night Listener' => 4.0 }, 'Claudia Puig' => { 'Snakes on a Plane' => 3.5, 'Just My Luck' => 3.0, 'The Night Listener' => 4.5, 'Superman Returns' => 4.0, 'You, Me and Dupree' => 2.5 }, 'Mick LaSalle' => { 'Lady in the Water' => 3.0, 'Snakes on a Plane' => 4.0, 'Just My Luck' => 2.0, 'Superman Returns' => 3.0, 'The Night Listener' => 3.0, 'You, Me and Dupree' => 2.0 }, 'Jack Matthews' => { 'Lady in the Water' => 3.0, 'Snakes on a Plane' => 4.0, 'The Night Listener' => 3.0, 'Superman Returns' => 5.0, 'You, Me and Dupree' => 3.5 }, 'Toby' => { 'Snakes on a Plane' => 4.5, 'You, Me and Dupree' => 1.0, 'Superman Returns' => 4.0 } } def sim_distance(prefs, person1, person2) si = prefs[person1].select { |k, _v| prefs[person2].key?(k) }.keys return 0 if si.size.zero? sum_of_square = si.inject(0) do |a, e| a + (prefs[person1][e] - prefs[person2][e])**2 end 1 / (1 + sum_of_square) end def sim_piason(prefs, person1, person2) si = prefs[person1].keys.select { |k| prefs[person2].key?(k) } n = si.size return 0 if n.zero? sum1 = si.inject(0) { |a, e| a + prefs[person1][e] } sum2 = si.inject(0) { |a, e| a + prefs[person2][e] } p_sum = si.inject(0) { |a, e| a + (prefs[person2][e] * prefs[person1][e]) } sum1_sq = si.inject(0) { |a, e| a + prefs[person1][e]**2 } sum2_sq = si.inject(0) { |a, e| a + prefs[person2][e]**2 } s_xy = p_sum - (sum1 * sum2 / n) s_xx = sum1_sq - (sum1**2 / n) s_yy = sum2_sq - (sum2**2 / n) return 0 if Math.sqrt(s_xx * s_yy).zero? s_xy / Math.sqrt(s_xx * s_yy) end def top_match(pref, person, n = 5, similarity = method(:sim_piason)) personp = ->(x) { x == person } cal_sim = ->(p) { [p, similarity.call(pref, p, person)] } descend = ->((_, p1), (_, p2)) { p2 <=> p1 } pref.keys.reject(&personp).map(&cal_sim).sort(&descend)[0...n] end def get_recommendation(pref, person, similarity = method(:sim_piason)) personp = ->(x) { x == person } cal_sim = ->(p) { [p, similarity.call(pref, p, person)] } gt_zero = ->(x) { x[1] > 0 } person_has_item = ->(x) { pref[person].key?(x) } totals = {} sim_sum = {} pref.keys.reject(&personp).map(&cal_sim).select(>_zero).each do |other, sim| pref[other].keys.reject(&person_has_item).each do |item| totals[item] = (totals[item] || 0) + pref[other][item] * sim sim_sum[item] = (sim_sum[item] || 0) + sim end end totals.map do |(k, v)| [(v / sim_sum[k]), k] end.sort.reverse end def transform_prefs(prefs) prefs.each_with_object({}) do |(person, items), a| items.each do |movie, v| a[movie] ||= {} a[movie][person] = v end end end def calculate_similar_item(prefs, n = 10) item_pref = transform_prefs(prefs) item_pref.keys.each_with_object({}) do |item, a| a[item] = top_match(item_pref, item, n, method(:sim_distance)) end end def get_recommended_items(prefs, item_match, user) user_rating = prefs[user] scores = {} total_sim = {} user_rating.each do |item, rating| item_match[item].reject { |item2, _| user_rating.keys.include?(item2) }.each do |item2, sim| scores[item2] = (scores[item2] || 0) + sim * rating total_sim[item2] = (total_sim[item2] || 0) + sim end end scores.map do |item, score| [(score / total_sim[item]), item] end.sort.reverse end if __FILE__ == $PROGRAM_NAME # p transform_prefs(critics) # p sim_piason(critics, 'Lisa Rose', 'Gene Seymour') # p sim_distance(critics, 'Lisa Rose', 'Gene Seymour') # p top_match(critics, 'Toby', 3) # p get_recommendation(critics, 'Toby') item_sim = calculate_similar_item(critics) p get_recommended_items(critics, item_sim, 'Toby') end