集合知プログラミング 2章をrubyで書いた

書きました。

APIを使うところは省略しています

critics = {
  'Lisa Rose' => { 'Lady in the Water' => 2.5, 'Snakes on a Plane' => 3.5, 'Just My Luck' => 3.0, 'Superman Returns' => 3.5, 'You, Me and Dupree' => 2.5, 'The Night Listener' => 3.0 },
  'Gene Seymour' => { 'Lady in the Water' => 3.0, 'Snakes on a Plane' => 3.5, 'Just My Luck' => 1.5, 'Superman Returns' => 5.0, 'The Night Listener' => 3.0, 'You, Me and Dupree' => 3.5 },
  'Michael Phillips' => { 'Lady in the Water' => 2.5, 'Snakes on a Plane' => 3.0, 'Superman Returns' => 3.5, 'The Night Listener' => 4.0 },
  'Claudia Puig' => { 'Snakes on a Plane' => 3.5, 'Just My Luck' => 3.0, 'The Night Listener' => 4.5, 'Superman Returns' => 4.0, 'You, Me and Dupree' => 2.5 },
  'Mick LaSalle' => { 'Lady in the Water' => 3.0, 'Snakes on a Plane' => 4.0, 'Just My Luck' => 2.0, 'Superman Returns' => 3.0, 'The Night Listener' => 3.0, 'You, Me and Dupree' => 2.0 },
  'Jack Matthews' => { 'Lady in the Water' => 3.0, 'Snakes on a Plane' => 4.0, 'The Night Listener' => 3.0, 'Superman Returns' => 5.0, 'You, Me and Dupree' => 3.5 },
  'Toby' => { 'Snakes on a Plane' => 4.5, 'You, Me and Dupree' => 1.0, 'Superman Returns' => 4.0 } }

def sim_distance(prefs, person1, person2)
  si = prefs[person1].select { |k, _v| prefs[person2].key?(k) }.keys
  return 0 if si.size.zero?

  sum_of_square = si.inject(0) do |a, e|
    a + (prefs[person1][e] - prefs[person2][e])**2
  end

  1 / (1 + sum_of_square)
end

def sim_piason(prefs, person1, person2)
  si = prefs[person1].keys.select { |k| prefs[person2].key?(k) }
  n = si.size
  return 0 if n.zero?

  sum1 = si.inject(0) { |a, e| a + prefs[person1][e] }
  sum2 = si.inject(0) { |a, e| a + prefs[person2][e] }
  p_sum = si.inject(0) { |a, e| a + (prefs[person2][e] * prefs[person1][e]) }

  sum1_sq = si.inject(0) { |a, e| a + prefs[person1][e]**2 }
  sum2_sq = si.inject(0) { |a, e| a + prefs[person2][e]**2 }

  s_xy = p_sum - (sum1 * sum2 / n)
  s_xx = sum1_sq - (sum1**2 / n)
  s_yy = sum2_sq - (sum2**2 / n)

  return 0 if Math.sqrt(s_xx * s_yy).zero?

  s_xy / Math.sqrt(s_xx * s_yy)
end

def top_match(pref, person, n = 5, similarity = method(:sim_piason))
  personp = ->(x) { x == person }
  cal_sim = ->(p) { [p, similarity.call(pref, p, person)] }
  descend = ->((_, p1), (_, p2)) { p2 <=> p1 }

  pref.keys.reject(&personp).map(&cal_sim).sort(&descend)[0...n]
end

def get_recommendation(pref, person, similarity = method(:sim_piason))
  personp = ->(x) { x == person }
  cal_sim = ->(p) { [p, similarity.call(pref, p, person)] }
  gt_zero = ->(x) { x[1] > 0 }
  person_has_item = ->(x) { pref[person].key?(x) }

  totals = {}
  sim_sum = {}

  pref.keys.reject(&personp).map(&cal_sim).select(&gt_zero).each do |other, sim|
    pref[other].keys.reject(&person_has_item).each do |item|
      totals[item] = (totals[item] || 0) +  pref[other][item] * sim
      sim_sum[item] = (sim_sum[item] || 0) + sim
    end
  end

  totals.map do |(k, v)|
    [(v / sim_sum[k]), k]
  end.sort.reverse
end

def transform_prefs(prefs)
  prefs.each_with_object({}) do |(person, items), a|
    items.each do |movie, v|
      a[movie] ||= {}
      a[movie][person] = v
    end
  end
end

def calculate_similar_item(prefs, n = 10)
  item_pref = transform_prefs(prefs)

  item_pref.keys.each_with_object({}) do |item, a|
    a[item] = top_match(item_pref, item, n, method(:sim_distance))
  end
end

def get_recommended_items(prefs, item_match, user)
  user_rating = prefs[user]
  scores = {}
  total_sim = {}

  user_rating.each do |item, rating|
    item_match[item].reject { |item2, _| user_rating.keys.include?(item2) }.each do |item2, sim|
      scores[item2] = (scores[item2] || 0) + sim * rating
      total_sim[item2] = (total_sim[item2] || 0) + sim
    end
  end

  scores.map do |item, score|
    [(score / total_sim[item]), item]
  end.sort.reverse
end

if __FILE__ == $PROGRAM_NAME
  # p transform_prefs(critics)
  # p sim_piason(critics, 'Lisa Rose', 'Gene Seymour')
  # p sim_distance(critics, 'Lisa Rose', 'Gene Seymour')
  # p top_match(critics, 'Toby', 3)
  # p get_recommendation(critics, 'Toby')

  item_sim = calculate_similar_item(critics)
  p get_recommended_items(critics, item_sim, 'Toby')
end