经典演算法题每日演练——第六题协同推荐SlopeOne 演算法

相信大家对如下的 Category 都很熟悉，很多网站都有类似如下的功能，“商品推荐”,” 猜你喜欢 “，在实体店中我们有导购来为我们服务，在互联网上
我们需要同样的一种替代物，如果简简单单的在资料库里面去捞，去比较，几乎是完成不了的, 这时我们就需要一种协同推荐演算法，来高效的推荐浏览者喜
欢的商品。

一：概念
SlopeOne 的思想很简单，就是用均值化的思想来掩盖个体的打分差异，举个例子说明一下：

在这个图中，系统该如何计算 “王五 “对” 电冰箱 “的打分值呢？刚才我们也说了，slopeone 是采用均值化的思想, 也就是：R 王五 =4-{[(5-10)+(4-5)]/2}=7 。
下面我们看看多于两项的商品，如何计算打分值。
rb = (n * (ra – R(A->B)) + m * (rc – R(C->B)))/(m+n)
注意： a,b,c 代表 “商品” 。
ra 代表 “商品的打分值” 。
ra->b 代表 “A 组到 B 组的平均差（均值化）” 。
m,n 代表人数。

根据公式，我们来算一下。
r王五 = (2 * (4 – R(洗衣机->彩电)) + 2 * (10 – R(电冰箱->彩电))+ 2 * (5 – R(空调->彩电)))/(2+2+2)=6.8
是的，slopeOne 就是这么简单，实战效果非常不错。

二：实现
1：定义一个评分类 Rating 。

1 ///

2 /// 评分实体类
3 ///

4 public class Rating
5 {
6 ///

7 /// 记录差值
8 ///

9 public float Value { get; set; }
10
11 ///

12 /// 记录评分人数，方便公式中的 m 和 n 的值
13 ///

14 public int Freq { get; set; }
15
16 ///

17 /// 记录打分使用者的 ID
18 ///

19 public HashSet hash_user = new HashSet();
20
21 ///

22 /// 平均值
23 ///

24 public float AverageValue
25 {
26 get { return Value / Freq; }
27 }
28 }

2：定义一个产品类

1 ///

2 /// 产品类
3 ///

4 public class Product
5 {
6 public int ProductID { get; set; }
7
8 public string ProductName { get; set; }
9
10 ///

11 /// 对产品的打分
12 ///

13 public float Score { get; set; }
14 }

3：SlopeOne 类
参考了互联网上的例子，将二维矩阵做成线性表，有效的降低了空间复杂度。

1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using System.Text;
5
6 namespace SupportCenter.Test
7 {
8 #region Slope One 演算法
9 ///

10 /// Slope One 演算法
11 ///

12 public class SlopeOne
13 {
14 ///

15 /// 评分系统
16 ///

17 public static Dictionary dicRatingSystem = new Dictionary();
18
19 public Dictionary dic_Martix = new Dictionary();
20
21 public HashSet hash_items = new HashSet();
22
23 #region 接收一个使用者的打分记录
24 ///

25 /// 接收一个使用者的打分记录
26 ///

27 /// 28 public void AddUserRatings(IDictionary> userRatings)
29 {
30 foreach (var user1 in userRatings)
31 {
32 //遍历所有的 Item
33 foreach (var item1 in user1.Value)
34 {
35 //该产品的编号（具有唯一性）
36 int item1Id = item1.ProductID;
37
38 //该专案的评分
39 float item1Rating = item1.Score;
40
41 //将产品编号字存放在 hash 表中
42 hash_items.Add(item1.ProductID);
43
44 foreach (var user2 in userRatings)
45 {
46 //再次遍历 item，用于计算俩俩 Item 之间的差值
47 foreach (var item2 in user2.Value)
48 {
49 //过滤掉同名的专案
50 if (item2.ProductID <= item1Id) 51 continue; 52 53 //该产品的名字 54 int item2Id = item2.ProductID; 55 56 //该专案的评分 57 float item2Rating = item2.Score; 58 59 Rating ratingDiff; 60 61 //用表的形式构建矩阵 62 var key = Tools.GetKey(item1Id, item2Id); 63 64 //将俩俩 Item 的差值存放到 Rating 中 65 if (dic_Martix.Keys.Contains(key)) 66 ratingDiff = dic_Martix[key]; 67 else 68 { 69 ratingDiff = new Rating(); 70 dic_Martix[key] = ratingDiff; 71 } 72 73 //方便以后以后 userrating 的编辑操作，（add) 74 if (!ratingDiff.hash_user.Contains(user1.Key)) 75 { 76 //value 储存差值 77 ratingDiff.Value += item1Rating - item2Rating; 78 79 //说明计算过一次 80 ratingDiff.Freq += 1; 81 } 82 83 //记录操作人的 ID，方便以后再次新增评分 84 ratingDiff.hash_user.Add(user1.Key); 85 } 86 } 87 } 88 } 89 } 90 #endregion 91 92 #region 根据矩阵的值，预测出该 Rating 中的值 93 ///

94 /// 根据矩阵的值，预测出该 Rating 中的值
95 ///

96 /// 97 ///
98 public IDictionary Predict(List userRatings)
99 {
100 Dictionary predictions = new Dictionary();
101
102 var productIDs = userRatings.Select(i => i.ProductID).ToList();
103
104 //回圈遍历_Items 中所有的 Items
105 foreach (var itemId in this.hash_items)
106 {
107 //过滤掉不需要计算的产品编号
108 if (productIDs.Contains(itemId))
109 continue;
110
111 Rating itemRating = new Rating();
112
113 // 内层遍历 userRatings
114 foreach (var userRating in userRatings)
115 {
116 if (userRating.ProductID == itemId)
117 continue;
118
119 int inputItemId = userRating.ProductID;
120
121 //获取该 key 对应专案的两组 AVG 的值
122 var key = Tools.GetKey(itemId, inputItemId);
123
124 if (dic_Martix.Keys.Contains(key))
125 {
126 Rating diff = dic_Martix[key];
127
128 //关键点：运用公式求解（这边为了节省空间，对角线两侧的值呈现奇函式的特性）
129 itemRating.Value += diff.Freq * (userRating.Score + diff.AverageValue * ((itemId < inputItemId) ? 1 : -1)); 130 131 //关键点：运用公式求解累计每两组的人数 132 itemRating.Freq += diff.Freq; 133 } 134 } 135 136 predictions.Add(itemId, itemRating.AverageValue); 137 } 138 139 return predictions; 140 } 141 #endregion 142 } 143 #endregion 144 145 #region 工具类 146 ///

147 /// 工具类
148 ///

149 public class Tools
150 {
151 public static string GetKey(int Item1Id, int Item2Id)
152 {
153 return (Item1Id < Item2Id) ? Item1Id + "->” + Item2Id : Item2Id + “->” + Item1Id;
154 }
155 }
156 #endregion
157 }

4: 测试类 Program
这里我们灌入了 userid=1000，2000，3000 的这三个人，然后我们预测 userID=3000 这个人对 “彩电” 的打分会是多少？

1 public class Program
2 {
3 static void Main(string[] args)
4 {
5 SlopeOne test = new SlopeOne();
6
7 Dictionary> userRating = new Dictionary>();
8
9 //第一位使用者
10 List list = new List()
11 {
12 new Product(){ ProductID=1, ProductName=” 洗衣机”,Score=5},
13 new Product(){ ProductID=2, ProductName=” 电冰箱”, Score=10},
14 new Product(){ ProductID=3, ProductName=” 彩电”, Score=10},
15 new Product(){ ProductID=4, ProductName=” 空调”, Score=5},
16 };
17
18 userRating.Add(1000, list);
19
20 test.AddUserRatings(userRating);
21
22 userRating.Clear();
23 userRating.Add(1000, list);
24
25 test.AddUserRatings(userRating);
26
27 //第二位使用者
28 list = new List()
29 {
30 new Product(){ ProductID=1, ProductName=” 洗衣机”,Score=4},
31 new Product(){ ProductID=2, ProductName=” 电冰箱”, Score=5},
32 new Product(){ ProductID=3, ProductName=” 彩电”, Score=4},
33 new Product(){ ProductID=4, ProductName=” 空调”, Score=10},
34 };
35
36 userRating.Clear();
37 userRating.Add(2000, list);
38
39 test.AddUserRatings(userRating);
40
41 //第三位使用者
42 list = new List()
43 {
44 new Product(){ ProductID=1, ProductName=” 洗衣机”, Score=4},
45 new Product(){ ProductID=2, ProductName=” 电冰箱”, Score=10},
46 new Product(){ ProductID=4, ProductName=” 空调”, Score=5},
47 };
48
49 userRating.Clear();
50 userRating.Add(3000, list);
51
52 test.AddUserRatings(userRating);
53
54 //那么我们预测 userID=3000 这个人对 “彩电” 的打分会是多少？
55 var userID = userRating.Keys.FirstOrDefault();
56 var result = userRating[userID];
57
58 var predictions = test.Predict(result);
59
60 foreach (var rating in predictions)
61 Console.WriteLine(“ProductID= ” + rating.Key + ” Rating: ” + rating.Value);
62 }
63 }