forked from douban/dpark
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmap_sim.cpp
More file actions
135 lines (120 loc) · 3.5 KB
/
map_sim.cpp
File metadata and controls
135 lines (120 loc) · 3.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/**
* @file map_sim.cpp
* @author Changsheng Jiang <jiangzuoyan@gmail.com>
* @date Fri Oct 21 17:07:48 2011
*
* @brief map similarity
*
*
*/
#include "douban/map_utility.hpp"
#include <boost/python.hpp>
#include <boost/python/stl_iterator.hpp>
#include <douban/fmtstr.hpp>
namespace bpy = boost::python;
namespace {
template <class K, class Func>
void input_map(
K /**/,
bpy::object &A, Func func) {
bpy::stl_input_iterator<bpy::object> first(A), last;
while (first != last) {
K k = bpy::extract<K>(*first);
bpy::object v = A[*first];
func(k, v);
++first;
}
}
} // namespace anonymous
// return (A * B' ktop by row)
bpy::object map_sim(
bpy::object R, bpy::object A, bpy::object B, size_t ktop) {
douban::map_type<size_t, size_t, double>::type a;
douban::map_type<size_t, size_t, double>::type b;
douban::map_type<size_t, std::vector< std::pair<size_t, double> > >::type sims;
auto input_map2 = [&](
bpy::object &A,
douban::map_type<size_t, size_t, double>::type &a) {
input_map(
(size_t)0, A,
[&](size_t k, bpy::object v) {
auto &d = a[k];
input_map(k, v, [&](size_t i, bpy::object vv) {
double x = bpy::extract<double>(vv);
d[i] = x;
});
});
};
input_map2(A, a);
input_map2(B, b);
b = douban::map_reverse2(b);
auto sim_cmp = [](const std::pair<size_t, double> &a,
const std::pair<size_t, double> &b) {
return douban::cmp(a.second, b.second);
};
auto insert_sim = [&](
size_t f, std::vector<std::pair<size_t, double> > &vsim, size_t t, double s) {
if (f == t) return; // ignore self sim.
auto elm = std::make_pair(t, s);
if (vsim.size() < (size_t)ktop) {
vsim.push_back(elm);
if (vsim.size() == (size_t)ktop) {
douban::heapify(vsim.begin(), vsim.end(), sim_cmp);
}
} else {
if (sim_cmp(vsim[0], elm) < 0) {
vsim[0] = elm;
douban::heap_shiftdown(vsim.begin(), vsim.end(), vsim.begin(), sim_cmp);
}
}
};
input_map(
(size_t)0, R,
[&](size_t n, bpy::object v) {
bpy::stl_input_iterator<bpy::object> first(v), last;
auto &d = sims[n];
while (first != last) {
bpy::object elm = *first;
size_t j = bpy::extract<size_t>(elm[0]);
double s = bpy::extract<double>(elm[1]);
insert_sim(n, d, j, s);
++first;
}
});
BOOST_FOREACH (auto &irow, a) {
auto i = irow.first;
douban::map_type<size_t, double>::type isim;
BOOST_FOREACH (auto &kv, irow.second) {
auto k = kv.first;
auto av = kv.second;
auto it = b.find(k);
if (it == b.end()) continue;
BOOST_FOREACH (auto &jv, it->second) {
auto j = jv.first;
auto bv = jv.second;
douban::set_default(isim, j, 0.) += av * bv;
}
}
auto &vsim = sims[i];
BOOST_FOREACH(auto &js, isim) {
insert_sim(i, vsim, js.first, js.second);
}
}
bpy::dict ret;
BOOST_FOREACH (auto &ns, sims) {
auto n = ns.first;
bpy::list l;
BOOST_FOREACH (auto &js, ns.second) {
l.append(bpy::make_tuple(js.first, js.second));
}
ret[n] = l;
}
// douban::fmterrlnt("A=(#%s, nnz=%s), B=(#%s, nnz=%s), ktop=%s",
// a.size(), douban::map_size2(a),
// b.size(), douban::map_size2(b),
// ktop);
return ret;
}
BOOST_PYTHON_MODULE(map_sim) {
bpy::def("map_sim", &map_sim, "map_sim(A, B, ktop)");
};