-
Notifications
You must be signed in to change notification settings - Fork 0
/
dense_R.m
75 lines (58 loc) · 2.18 KB
/
dense_R.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
function [ R ] = dense_R( data_raw, item_limit, user_limit, max_item_reviews, min_user_reviews )
%DENSE_R takes the input in the standard format and outputs an R matrix
% with the most-reviewed items and users who have reviewed those items
% many times (at least 3)
num_items = max(data_raw(2,:)) + 1;
num_users = max(data_raw(1,:)) + 1;
% count items
item_counts = zeros(1,num_items);
for col_i=1:size(data_raw,2)
col = data_raw(:,col_i);
item = col(2) + 1;
item_counts(item) = item_counts(item) + 1;
end
%
% grab top [item_limit] items w/ between 1 and 50 reviews
itemp = [1:num_items ; item_counts];
itemp = itemp(:,itemp(2,:) > 1);
itemp = itemp(:,itemp(2,:) <= max_item_reviews);
[~, ix] = sort(itemp(2,:),2,'descend');
ix = itemp(1,ix); % ix = ids of top users
item_limit = min(item_limit, numel(ix));
% item_map(id) = 0 if we're not keeping that item, [new id] if we are
item_map = zeros(1,numel(item_counts));
item_map(ix(1:item_limit)) = 1:item_limit; % keep [limit] items w/ the most reviews
% count users (for the items we now care about)
user_counts = zeros(1, num_users);
for col_i=1:size(data_raw,2)
col = data_raw(:,col_i);
user = col(1) + 1;
item = col(2) + 1;
if item_map(item) == 0
continue;
end
user_counts(user) = user_counts(user) + 1;
end
% grab random [user_limit] users w/ at least 5 reviews
itemp = [1:num_users ; user_counts];
itemp = itemp(:,itemp(2,:) >= min_user_reviews);
itemp = itemp(:,randperm(size(itemp,2)));
ix = itemp(1,:);
final_user_count = min(numel(ix), user_limit);
user_map = zeros(1, numel(user_counts));
user_map(ix(1:final_user_count)) = 1:final_user_count;
% create R using this filtered data
num_ditched = 0;
R = zeros(item_limit, final_user_count);
for col_i=1:size(data_raw,2)
col = data_raw(:,col_i);
user = col(1) + 1;
item = col(2) + 1;
new_user = user_map(user);
new_item = item_map(item);
if new_item == 0 || new_user == 0
continue
end
R(new_item, new_user) = col(3);
end
end