1 (* Thomas Hales, June 29, 2011,
2 implementation of the rank boost algorithm from
3 "An Efficient Boosting Algorithm for Combining Preferences"
4 Freund, Iyer, Schapire, and Singer.
6 The 'bottom' symbol _|_ is implemented by throwing an exception.
11 module Rank_boost = struct
13 (**********************************************************************)
16 let exp = Pervasives.exp;;
18 let abs = Pervasives.abs_float;;
20 let infinity = Pervasives.infinity;;
22 let neg_infinity = Pervasives.neg_infinity;;
26 let scale c = map ( ( *. ) c) ;;
28 let outer_col_mul d v =
29 map (map2 ( *. ) v) d;;
31 let outer_row_mul d v =
34 let sum = end_itlist (+.);;
38 outer_col_mul [[1.0;2.0];[3.0;4.0]] [7.0;11.0];;
39 outer_row_mul [[1.0;2.0];[3.0;4.0]] [7.0;11.0];;
41 (**********************************************************************)
44 let alpha_r r = 0.5 *. log ((1.0 +. r) /. (1.0 -. r));;
47 let total = sum (map sum d) in
48 map (scale (1.0 /. total)) d;;
50 let mk_distrib phi X =
51 let row y = map (fun x -> max 0.0 (phi y x)) X in
52 normalize(map row X);;
55 let mk_uniform_distribution n =
56 let row = replicate 1.0 n in
57 normalize (replicate row n) ;;
60 let update_distribution distrib alpha hs =
61 let h1 = map (fun t -> exp(alpha *. t)) hs in
62 let h2 = map (fun t -> 1.0 /. t) h1 in
63 let d =outer_col_mul distrib h2 in
64 let d = outer_row_mul d h1 in
68 let zf = mapfilter (fun (i,x) -> (i,f x)) zX in
69 sort (fun (_,f1) (_,f2) -> f1 > f2) zf;;
72 let row_sums = map sum distrib in
73 let col_sums = end_itlist (map2 (+.)) distrib in
74 map2 ( -. ) row_sums col_sums;;
77 might behave slightly incorrectly when the optimal f' is a repeated value of f.
78 There is no problem, if the ranking f is strict.
80 default_q currently doesn't get used in the end. *)
82 let weak_update R pi default_q (L,r,theta,q) (i',f') =
83 let L = L +. nth pi i' in
84 let q' = match default_q with
86 | None -> if (abs(L) > abs(L -. R)) then 0.0 else 1.0 in
87 if abs(L -. q *. R) > abs(r) then (L,L-. q*. R,f',q') else (L,r,theta,q);;
89 let weak_learn_one pi default_q zf =
90 let R = List.fold_left (fun s (i,_) -> s +. nth pi i) 0.0 zf in
91 List.fold_left (weak_update R pi default_q) (0.0,0.0,Pervasives.infinity,0.0) zf ;;
93 let weak_learn_all distrib ranks sfs =
94 let pi = mk_pi distrib in
95 let wl = weak_learn_one pi None in
96 let foldf (r,absr,fx,q,f) (zf,f') =
97 let (_,r',fx',q') = wl zf in
98 (if (abs r' > absr) then (r',abs r',fx',q',f') else (r,absr,fx,q,f)) in
99 let (r,_,theta,q,f) = List.fold_left foldf
100 (0.0,neg_infinity,0.0,0.0,(fun t ->0.0))
102 let alpha = alpha_r r in
105 let learned_h q theta f x =
106 try ( if f x >= theta then 1.0 else 0.0) with _ -> q;;
110 sort_zipf I (zipX [2.0;3.0;4.0]);;
111 sort_zipf (fun t -> if t < 3.0 then t else failwith "bad") (zipX [7.0;4.0;3.0;2.0;2.5;1.0]);;
112 mk_pi [[1.0;2.0];[3.0;4.0]];;
114 (**********************************************************************)
115 (* running the boost algorithm T times *)
119 ranks:(A->real) list, (ranking functions)
120 phi:A -> A-> float, feedback function.
121 t=number of iterations, typically 40-150 range. paper uses 40+n/10, n=length ranks.
122 We don't preset theta, we compute it as some value fx.
125 let rec rank_boost t ranks wts distrib sfs X =
126 if (t=List.length wts) then wts
128 let (alpha,q,theta,f)=weak_learn_all distrib ranks sfs in
129 let h = learned_h q theta f in
130 let distrib' = update_distribution distrib alpha (map h X) in
131 rank_boost t ranks ((alpha,q,theta,f)::wts) distrib' sfs X;;
133 let calc_wts X phi ranks t =
134 let distrib = mk_distrib phi X in
135 let zipX = zip (0--(List.length X - 1)) X in
136 let sfs = map (fun f -> sort_zipf f zipX) ranks in
137 rank_boost t ranks [] distrib sfs X;;
139 let evalf wts x = List.fold_left
140 (fun s (alpha,q,theta,f) -> s +. alpha *. learned_h q theta f x) 0.0 wts;;