38:- use_module(library(mcintyre)). 39
40:- if(current_predicate(use_rendering/1)). 41:- use_rendering(c3). 42:- use_rendering(graphviz). 43:- endif. 44
45:- mc. 46
47:- begin_lpad. 48
53
54hmm(O):-hmm(_,O).
57
58hmm(S,O):-trans(start,Q0,[]),hmm(Q0,[],S0,O),reverse(S0,S).
61
62hmm(Q,S0,S,[L|O]):-
63 trans(Q,Q1,S0),
64 out(Q,L,S0),
65 hmm(Q1,[Q|S0],S,O).
68
69hmm(_,S,S,[]).
71
76
77hmm2(O):-hmm2(_,O).
80
81hmm2(S,O):-trans2(start,start,Q0,[]),hmm2(start,Q0,[],S0,O),reverse(S0,S).
84
85hmm2(Q0,Q,S0,S,[L|O]):-
86 trans2(Q0,Q,Q1,S0),
87 out2(Q0,Q,L,S0),
88 hmm2(Q,Q1,[Q|S0],S,O).
91
92hmm2(_,_,S,S,[]).
94
95trans(S0,S1,H):-
96 findall((S,P),pc_c(S,S0,P),L),
97 append(L0,[(LastS,_P)],L),
98 foldl(pick_next_state(S0,H),L0,(1,_),(_,S1)),
99 (var(S1)->
100 S1=LastS
101 ;
102 true
103 ).
104
105pick_next_state(_S0,_H,_L,(P0,V0),(P0,V0)):-
106 nonvar(V0).
107
108pick_next_state(S0,H,(S,P),(P0,V0),(P1,V1)):-
109 var(V0),
110 PF is P/P0,
111 (prob_fact_state(S0,S,H,PF)->
112 P1=PF,
113 V1=S
114 ;
115 P1 is P0*(1-PF),
116 V1=V0
117 ).
118
119prob_fact_state(_,_,_,P):P.
120
121out(S0,W,H):-
122 findall((W,P),pw_c(W,S0,P),L),
123 append(L0,[(LastW,_P)],L),
124 foldl(pick_word(S0,H),L0,(1,_),(_,W)),
125 (var(W)->
126 W=LastW
127 ;
128 true
129 ).
130
131pick_word(_S0,_H,_L,(P0,V0),(P0,V0)):-
132 nonvar(V0).
133
134pick_word(S0,H,(W,P),(P0,V0),(P1,V1)):-
135 var(V0),
136 PF is P/P0,
137 (prob_fact_word(S0,W,H,PF)->
138 P1=PF,
139 V1=W
140 ;
141 P1 is P0*(1-PF),
142 V1=V0
143 ).
144
145prob_fact_word(_,_,_,P):P.
146
147trans2(S0,S1,S2,H):-
148 findall((S,P),pc_cc(S,S0,S1,P),L),
149 append(L0,[(LastS,_P)],L),
150 foldl(pick_next_state2(S0,S1,H),L0,(1,_),(_,S2)),
151 (var(S2)->
152 S2=LastS
153 ;
154 true
155 ).
156
157pick_next_state2(_S0,_S1,_H,_L,(P0,V0),(P0,V0)):-
158 nonvar(V0).
159
160pick_next_state2(S0,S1,H,(S,P),(P0,V0),(P1,V1)):-
161 var(V0),
162 PF is P/P0,
163 (prob_fact_state2(S0,S1,S,H,PF)->
164 P1=PF,
165 V1=S
166 ;
167 P1 is P0*(1-PF),
168 V1=V0
169 ).
170
171prob_fact_state2(_,_,_,_,P):P.
172
173out2(S0,S1,W,H):-
174 findall((W,P),pw_cc(W,S0,S1,P),L),
175 append(L0,[(LastW,_P)],L),
176 foldl(pick_word2(S0,S1,H),L0,(1,_),(_,W)),
177 (var(W)->
178 W=LastW
179 ;
180 true
181 ).
182
183pick_word2(_S0,_S1,_H,_L,(P0,V0),(P0,V0)):-
184 nonvar(V0).
185
186pick_word2(S0,S1,H,(W,P),(P0,V0),(P1,V1)):-
187 var(V0),
188 PF is P/P0,
189 (prob_fact_word2(S0,S1,W,H,PF)->
190 P1=PF,
191 V1=W
192 ;
193 P1 is P0*(1-PF),
194 V1=V0
195 ).
196
197prob_fact_word2(_,_,_,_,P):P.
198
199
200
201:- end_lpad. 202
203
232pc(C,P) :-
233 fc(C,F),
234 tokens(N),
235 P is F/N.
236
238pc_c(C,start,P):-!,
239 fc(C,F1),
240 tokens(F2),
241 P is F1/F2.
242
243pc_c(C2,C1,P) :-
244 fcc(C1,C2,F1),
245 fc(C1,F2),
246 P is F1/F2.
247
248pc_c(C2,C1,0) :-
249 \+ fcc(C1,C2,_).
250
251pc_cc(C,start,start,P):-!,
252 pc_c(C,start,P).
253
254pc_cc(C,start,C1,P):-!,
255 pc_c(C,C1,P).
256
257
259pc_cc(C3,C1,C2,P) :-
260 fccc(C1,C2,C3,F1),
261 fcc(C1,C2,F2),
262 P is F1/F2.
263
264pc_cc(C3,C1,C2,0) :-
265 \+ fccc(C1,C2,C3,_).
266
331pw_c(W,C,P) :-
332 fwc(W,C,F1),
333 fc(C,F2),
334 \+ open(C),
335 P is F1/F2.
336
337pw_c(W,C,P) :-
338 fwc(W,C,F1),
339 fc(C,F2),
340 open(C),
341 tokens(N),
342 P is F1/(F2+(F2/N)).
343
344pw_c(W,C,P) :-
345 \+ fwc(W,_,_),
346 open(C),
347 tokens(N),
348 P is 1/N.
349
350pw_cc(W,C1,C2,P) :-
351 fwcc(W,C1,C2,F1),
352 hfreq(F),
353 F1 > F,
354 fcc(C1,C2,F2),
355 P is F1/F2.
356
357pw_cc(W,C1,C2,P) :-
358 pw_c(W,C2,P),
360 \+ (fwcc(W,C1,C2,F1), hfreq(F), F1 > F).
361
362open(ab).
363open(in).
364open(jj).
365open(nn).
366open(pc).
367open(p).
368open(rg).
369open(uo).
370open(vb).
371
372hfreq(0).
373
383fwc('.', dl, 2).
384fwc(',', dl, 1).
385fwc(a, dt, 2).
386fwc(and, cn, 2).
387fwc(beans, nn, 1).
388fwc(can, nn, 2).
389fwc(can, vb, 3).
390fwc(eat, vb, 1).
391fwc(fire, nn, 2).
392fwc('I', pn, 1).
393fwc(in, pp, 1).
394fwc(is, vb, 1).
395fwc(light, nn, 1).
396fwc(light, vb, 1).
397fwc('Now', ab, 1).
398fwc(of, pp, 2).
399fwc(open, vb, 1).
400fwc(open, jj, 1).
401fwc(the, dt, 3).
402fwc(we, pn, 1).
403fwc(you, pn, 1).
404
405fwcc('.', dl, nn, 2).
406fwcc(',', jj, dl,1).
407fwcc(a, vb, dt, 2).
408fwcc(and, nn,cn,1).
409fwcc(and, dl,cn, 1).
410fwcc(beans, pp,nn, 1).
411fwcc(can, dt,nn,2).
412fwcc(can, pn,vb, 3).
413fwcc(eat, vb,vb,1).
414fwcc(fire, dt,nn, 2).
416fwcc(in, vb,pp, 1).
417fwcc(is, nn,vb, 1).
418fwcc(light, dt,nn, 1).
419fwcc(light, vb, vb,1).
420fwcc('Now', dl,ab, 1).
421fwcc(of, nn,pp, 2).
422fwcc(open, vb, vb,1).
423fwcc(open, vb,jj,1).
424fwcc(the, ab,dt, 1).
425fwcc(the,pp, dt, 2).
426fwcc(we, cn,pn,1).
427fwcc(you, cn,pn, 1).
428
429
432
433fc(ab, 1).
434fc(cn, 2).
435fc(dl, 3).
436fc(dt, 5).
437fc(jj, 1).
438fc(nn, 6).
439fc(pn, 3).
440fc(pp, 3).
441fc(vb, 7).
442
443
446
447
448fcc(ab, dt, 1).
449fcc(cn, pn, 2).
450fcc(dl, ab, 1).
451fcc(dl, cn, 1).
452fcc(dt, nn, 5).
453fcc(jj, dl, 1).
454fcc(nn, cn, 1).
455fcc(nn, dl, 2).
456fcc(nn, pp, 2).
457fcc(nn, vb, 1).
458fcc(pn, vb, 3).
459fcc(pp, dt, 2).
460fcc(pp, nn, 1).
461fcc(vb, dt, 2).
462fcc(vb, jj, 1).
463fcc(vb, pp, 1).
464fcc(vb, vb, 3).
465
466
467fccc(ab, dt, nn, 1).
468fccc(cn, pn, vb, 2).
469fccc(dl, ab, dt, 1).
470fccc(dl, cn, pn, 1).
471fccc(dt, nn, cn, 1).
472fccc(dt, nn, dl, 1).
473fccc(dt, nn, pp, 2).
474fccc(dt, nn, vb, 1).
475fccc(jj, dl, cn, 1).
476fccc(nn, cn, pn, 1).
477fccc(nn, dl, ab, 1).
478fccc(nn, pp, dt, 1).
479fccc(nn, pp, nn, 1).
480fccc(nn, vb, jj, 1).
481fccc(pn, vb, vb, 3).
482fccc(pp, nn, dl, 1).
483fccc(pp, dt, nn, 2).
484fccc(vb, dt, nn, 2).
485fccc(vb, jj, dl, 1).
486fccc(vb, pp, d1, 1).
487fccc(vb, vb, dt, 2).
488fccc(vb, vb, pp, 1).
489
490classes :-
491 setof(C,F^fc(C,F),Cs),
492 length(Cs,N),
493 assert(classes(N)).
494
495tokens :-
496 bagof(F,W^C^fwc(W,C,F),Fs),
497 sum_list(Fs,N),
498 assert(tokens(N)).
499
500types :-
501 setof(W,C^F^fwc(W,C,F),Ws),
502 length(Ws,N),
503 assert(types(N)).
504
505:- tokens. 506:- types. 507:- classes.
?-
mc_sample_arg(hmm(S,['I',can,can,a,can]),1000,S,O)
. % sample the state sequence corresonding to the phrase "I can can a can" % the most frequent state sequence is an approximate POS tagging for the % sentence. It corresponds to the Viterbi path of the HMM. % expected result: the most frequent tagging should be [pn,vb,vb,dt,nn] ?-mc_sample_arg(hmm2(S,['I',can,can,a,can]),1000,S,O)
. % as above but for the second order model % ?-mc_sample_arg(hmm(S,['I',can,can,a,can]),1000,S,O)
,argbar(O,C)
. ?-mc_sample_arg(hmm2(S,['I',can,can,a,can]),1000,S,O)
,argbar(O,C)
. ?-mc_sample_arg(hmm(S,[can, the ,can, do, the, can ,can]),10000,S,O)
,argbar(O,C)
. % example by Douglas R. Miles ?-mc_sample_arg(hmm(S,[can, the ,can, do, the, can ,can]),10000,S,O)
,argbar(O,C)
. */