1:- module(crp,
2 [ empty_classes/1
3 , dec_class//3
4 , inc_class//1
5 , remove_class//1
6 , add_class//2
7
8 , crp_prob/5
9 , crp_sample/5
10 , crp_sample_obs/7
11 , crp_sample_obs/8
12 , crp_sample_rm/5
13
14 , dp_sampler_teh/3
15 , py_sampler_teh/4
16 ]).
49% :- use_module(library(dcg_core)). 50% :- use_module(library(dcg_macros)). 51:- use_module(library(apply_macros)). 52:- use_module(library(plrand), [spawn/3, crp_prob/5, crp_sample/5, crp_sample_obs/8, crp_sample_rm/5]).
old(X,ID)
, where X is an old value and C is the class index.
Operates in random state DCG.
of the action choosen.old(N)
where N is the class index. crp_sample_obs//6 additionally returns the
probability of the observation, equivalent to calling crp_prob with X BEFORE
calling crp_sample_obs//5.
Operates in random state DCG.
80crp_sample_obs(GEM, Classes, X, PBase, Action) -->
81 crp_sample_obs(GEM, Classes, X, PBase, Action, _).
91% --------------------------------------------------------------------------------
92% classes data structure (basic CRP stuff)
98empty_classes(classes(0,[],[])).
105dec_class(N,CI,X,classes(K,C1,Vs),classes(K,C2,Vs)) :- dec_nth(N,CI,C1,C2), nth1(N,Vs,X). 106dec_nth(1,Y,[X|T],[Y|T]) :- succ(Y,X). 107dec_nth(N,Y,[X|T1],[X|T2]) :- succ(M,N), dec_nth(M,Y,T1,T2).
112inc_class(N,classes(K,C1,V),classes(K,C2,V)) :- inc_nth(N,C1,C2). 113inc_nth(1,[X|T],[Y|T]) :- succ(X,Y). 114inc_nth(N,[X|T1],[X|T2]) :- succ(M,N), inc_nth(M,T1,T2).
120remove_class(N,classes(K1,C1,V1),classes(K2,C2,V2)) :-
121 remove_nth(N,C1,C2),
122 remove_nth(N,V1,V2),
123 succ(K2,K1).
128add_class(X,N2,classes(N1,C1,V1),classes(N2,C2,V2)) :- 129 succ(N1,N2), 130 append(C1,[1],C2), 131 append(V1,[X],V2). 132 133 134remove_nth(1,[_|T],T). 135remove_nth(N,[Y|T1],[Y|T2]) :- 136 ( var(N) 137 -> remove_nth(M,T1,T2), succ(M,N) 138 ; succ(M,N), remove_nth(M,T1,T2) 139 ). 140 141 142% --------------------------------------------------------------- 143% PARAMETER SAMPLING 144% Initialisers in Prolog, samplers written in C.
gem_prior
arguments must be of the form dp(_)
.
Prior specifies the Gamma distribution prior for the concentration parameter,
as gamma(a,b)
, where a is the shape parameter and b is the rate parameter
(ie the inverse of the scale parameter).
153dp_sampler_teh( gamma(A,B), CX, plrand:sample_dp_teh(ApSumKX,B,NX)) :-
154 maplist(sumlist,CX,NX),
155 maplist(length,CX,KX),
156 sumlist(KX,SumKX),
157 ApSumKX is A+SumKX.
gem_prior
arguments must be of the form py(_,_)
.
See dp_sampler_teh/3 for tha description of the gamma_prior type. DiscPr is a
Beta distribution prior for the concentration parameter.166py_sampler_teh( ThPrior, DiscPrior, CountsX, Sampler) :- 167 Sampler = plrand:sample_py_teh( ThPrior, DiscPrior, CountsX). 168 169 170/* ---------------------------------------------------------------------------------------i 171 PURE PROLOG VERSIONS (not including MCMC parameter samplers) 172 173:- use_module(library(math), [sub/3, equal/3, stoch/3, mul/3]). 174 175sample_discrete(O,P,Y) --> {length(P,N)}, plrand:sample_Discrete(N,P,X), {nth1(X,O,Y)}. 176 177crp_prob( Alpha, classes(_,Counts,Vals), X, PProb, P) :- 178 counts_dist( Alpha, Counts, Counts1), 179 stoch( Counts1, Probs, _), 180 maplist( equal(X), Vals, Mask), 181 maplist( mul, [PProb | Mask], Probs, PostProbs), 182 sumlist( PostProbs, P). 183 184%% crp_sample( +GEM:gem_model, +Classes:classes(A), -A:action(A))// is det. 185% 186% Sample a new value from CRP, Action A is either new, which means 187% that the user should sample a new value from the base distribtion, 188% or old(X,C), where X is an old value and C is the index of its class. 189% Operates in random state DCG. 190crp_sample( Alpha, classes(_,Counts,Vals), Action, RS1, RS2) :- 191 counts_dist(Alpha, Counts, Counts1), 192 sample_discrete(Counts1,Z,RS1,RS2), 193 ( Z>1 -> succ(C,Z), nth1(C,Vals,X), Action=old(X,C) 194 ; Action=new). 195 196 197%% crp_sample_obs( +GEM:gem_model, +Classes:classes(A), +X:A, +PProb:float, -A:action)// is det. 198% 199% Sample class appropriate for observation of value X. PProb is the 200% base probability of X from the base distribution. Action A is new 201% or old(Class). 202% Operates in random state DCG. 203crp_sample_obs( Alpha, classes(_,Counts,Vals), X, ProbX, A, RS1, RS2) :- 204 counts_dist( Alpha, Counts, [CNew|Counts1]), 205 PNew is CNew*ProbX, 206 maplist( posterior_count(X),Vals,Counts1,Counts2), 207 sample_discrete( [PNew|Counts2], Z, RS1, RS2), 208 (Z=1 -> A=new; succ(C,Z), A=old(C)). 209 210 211%% crp_sample_rm( +Classes:classes(A), +X:A, -C:natural)// is det. 212% 213% Sample appropriate class from which to remove value X. 214% Operates in random state DCG. 215crp_sample_rm( classes(_,Counts,Vals), X, Class, RS1, RS2) :- 216 maplist(posterior_count(X),Vals,Counts,Counts1), 217 sample_discrete( Counts1, Class, RS1, RS2). 218 219 220posterior_count(X,Val,Count,PC) :- X=Val -> PC=Count; PC=0. 221 222% ----------------------------------------------------------- 223% Dirichlet process and Pitman-Yor process 224% pseudo-counts models. 225 226counts_dist(dp(Alpha),Counts,[Alpha|Counts]) :- !. 227counts_dist(py(_,_),[],[1]) :- !. 228counts_dist(py(Alpha,Discount),Counts,[CNew|Counts1]) :- !, 229 length(Counts,K), 230 CNew is Alpha+Discount*K, 231 maplist(sub(Discount),Counts,Counts1). 232 233*/
Chinese Restaurant Process utilities
This module provides some building blocks for implementing a family of random processes related to Dirichlet processes, including Pitman Yor processe, the Chinese Restaurant process, and the stick breaking model (GEM). The Dirichlet processes takes a single concentration parameter, representated as
dp(Conc)
, while the Pitman Yor process takes a concentration parameter and a discount parameter, representated aspy(Conc,Disc)
.This may seems like a very low-level library for building CRPs, leaving a lot for the implemeenter to do, but this is intentional, to allow the implementer freedom to decide how to manage the states (terms of type
classes(_)
) of one or more CRPs, as well as the state of the random generator, in whatever way is most appropriate. See the the example implementation oftest_crp.pl
for one way to do this. */