2:- use_module(library(sgml)). 3:- use_module(library(lists),[member/2,append/3,reverse/2]). 4
5test(F):-
6 load_xml_file(F,T),
8 elements(T,['VNCLASS'],f(X,C)),
9 value(X,'ID',ID),
10 check(ID,C).
11
12verbnet2prolog(F):-
13 load_xml_file(F,T),
15 elements(T,['VNCLASS'],f(X,C)),
16 value(X,'ID',ID),
17 members(C,F,[],ID).
18
19
23
24pretty_print([],_).
25
26pretty_print([element(A,B,C)|L],Tab):- !,
27 tab(Tab), write(A), write(' '), write(B), nl,
28 NewTab is Tab+3,
29 pretty_print(C,NewTab),
30 pretty_print(L,Tab).
31
32pretty_print([E|L],Tab):-
33 tab(Tab), write(unknown:E),nl,
34 pretty_print(L,Tab).
35
36
40
41check(ID,X):-
42 elements(X,['FRAMES','FRAME'],f(_,Frame)),
43 elements(Frame,['DESCRIPTION'],f(De,_)),
44 value(De,primary,Primary),
45 write(ID), write(': '), write(Primary),nl,
46 fail.
47
48check(_,X):-
49 elements(X,['SUBCLASSES','VNSUBCLASS'],f(Y,Sub)),
50 value(Y,'ID',ID),
51 check(ID,Sub),
52 fail.
53
54check(_,_).
55
56
60
61members(X,File,SuperFrames,XID):-
62 findall(Name,(elements(X,['MEMBERS','MEMBER'],f(Member,_)),value(Member,name,Name)),Names),
63 findall(Frame,(elements(X,['FRAMES','FRAME'],f(_,Frame))),SubFrames),
64 append(SuperFrames,SubFrames,Frames),
65 frameMember(Frames,Names,XID,File),
66 findall(Sub,(elements(X,['SUBCLASSES','VNSUBCLASS'],f(Y,Sub)),
67 value(Y,'ID',YID),
68 members(Sub,File,Frames,YID)),_).
69
70
74
75frameMember([],_,_,_):- !.
76
77frameMember([F|L],Names,ID,File):-
78 pairMemberFrame(Names,F,ID,File),
79 frameMember(L,Names,ID,File).
80
81pairMemberFrame([],_,_,_).
82
83pairMemberFrame([Name|L],Frame,ID,File):-
84 elements(Frame,['DESCRIPTION'],f(De,_)),
85 value(De,primary,Pr),
86 elements(Frame,['SYNTAX'],f(_,Syntax)),
87 format('verbnet(~q, ~q, ',[Name,Pr]),
88 subcatpat(Syntax,[],SubCatPat),
89 format('~q, ',[SubCatPat]),
90 subcat(Syntax,[],SubCat),
91 atom_chars(ID,IDChars),
92 formatID(IDChars,[_,_|FID]),
93 format('~q,~q). %%% ~p (~p)~n',[SubCat,FID,ID,File]), !,
94 pairMemberFrame(L,Frame,ID,File).
95
96
100
101formatID(Chars,[Pre,Sep1|L]):-
102 Seps = ['-','.'], member(Sep1,Seps),
103 append(PreChars,[Sep1|RestChars],Chars),
104 \+ ( member(Sep2,Seps), member(Sep2,PreChars) ), !,
105 formatNumber(PreChars,Pre),
106 formatID(RestChars,L).
107
108formatID(Chars,[ID]):-
109 formatNumber(Chars,ID).
110
111formatNumber(Chars,Num):-
112 Chars = [First|_],
113 member(First,['0','1','2','3','4','5','6','7','8','9']), !,
114 number_chars(Num,Chars).
115
116formatNumber(Chars,Atom):-
117 atom_chars(Atom,Chars).
118
122
123subcat([],Acc1,Acc2):- postproc(Acc1,[],Acc2).
124subcat([E|L],Acc1,Acc3):- cat(E,Acc1,Acc2), subcat(L,Acc2,Acc3).
125
126subcatpat([],Acc1,Acc2):- postproc(Acc1,[],Acc2).
127subcatpat([E|L],Acc1,Acc3):- catpat(E,Acc1,Acc2), subcatpat(L,Acc2,Acc3).
128
129
133
134postproc([],L,L).
135postproc([np,pp|L1],Acc,L2):- !, postproc(L1,[pp|Acc],L2).
136postproc([np:V,pp|L1],Acc,L2):- !, postproc(L1,[pp:V|Acc],L2).
137postproc([s,pp|L1],Acc,L2):- !, postproc(L1,[s|Acc],L2).
138postproc([s:V,pp|L1],Acc,L2):- !, postproc(L1,[s:V|Acc],L2).
139postproc([X|L1],Acc,L2):- postproc(L1,[X|Acc],L2).
140
144
145restr(Restr,Type):-
146 Restr = [element('SYNRESTRS',[],L)],
147 member(element('SYNRESTR',['Value'='+',type=Type],[]),L), !.
148
149s_restr(that_comp).
150s_restr(for_comp).
151s_restr(wh_comp).
152
154s_restr(acc_ing).
155s_restr(oc_ing).
156s_restr(ac_ing).
157s_restr(be_sc_ing).
158s_restr(np_omit_ing). 159s_restr(np_ppart). 160s_restr(np_p_ing). 161s_restr(np_ing). 162
163s_restr(how_extract).
164s_restr(what_extract).
165
166s_restr(wh_inf).
167s_restr(what_inf).
168s_restr(wheth_inf).
169s_restr(oc_bare_inf).
170s_restr(oc_to_inf).
171s_restr(ac_to_inf).
172s_restr(sc_to_inf).
173s_restr(np_to_inf).
174s_restr(vc_to_inf).
175s_restr(rs_to_inf). 176s_restr(to_inf_rs). 177
178
182
183cat(element('NP', [value=Value], R),A,[s:Value|A]):- s_restr(S), restr(R,S), !.
184cat(element('NP', [value=Value], _),A,[np:Value|A]):- !.
185cat(element('PREP', [], _),A,[pp|A]):- !.
186cat(element('PREP', [value=Value], _),A,[prep:Value|A]):- !.
187cat(element('LEX', [value='[+be]'], _),A,[lex:be|A]):- !.
188cat(element('LEX', [value='it[+be]'], _),A,[lex:be,lex:it|A]):- !.
189cat(element('LEX', [value=at], _),A,[prep:at|A]):- !.
190cat(element('LEX', [value=of], _),A,[prep:of|A]):- !.
191cat(element('LEX', [value=Value], _),A,[lex:Value|A]):- !.
192cat(element('VERB',[],[]),A,[v|A]):- !.
193cat(element('ADJ',[],[]),A,[adj|A]):- !.
194cat(element('ADV',[],[]),A,[adv|A]):- !.
195cat(U,A,[unk:U|A]):- !.
196
197catpat(element('NP',_,R),A,[s|A]):- s_restr(S), restr(R,S), !.
198catpat(element('NP',_,_),A,[np|A]):- !.
199catpat(element('PREP', [], _),A,[pp|A]):- !.
200catpat(element('PREP', [value=_], _),A,[prep|A]):- !.
201catpat(element('LEX',[value=at],_),A,[prep|A]):- !.
202catpat(element('LEX',[value=of],_),A,[prep|A]):- !.
203catpat(element('LEX',_,_),A,[lex|A]):- !.
204catpat(element('VERB',_,_),A,[v|A]):- !.
205catpat(element('ADJ',_,_),A,[adj|A]):- !.
206catpat(element('ADV',_,_),A,[adv|A]):- !.
207catpat(_,A,[unk|A]):- !.
208
209
213
214elements([element(X,F,L)|_],[X],f(F,L)).
215elements([element(X,_,L)|_],[X|R],A):- elements(L,R,A).
216elements([_|L],X,A):- elements(L,X,A).
217
218
222
223value([Name=Value|_],Name,Value):- !.
224value([_|L],Name,Value):- value(L,Name,Value).
225
226
230
231verbnet_dir('ext/VerbNet/').
232
233
237
238process([]).
239process([File|L]):-
241 verbnet2prolog(File),
242 process(L).
243
244
248
249run:-
250 verbnet_dir(Dir),
251 exists_directory(Dir),
252 WildCard = '*.xml',
254 atom_concat(Dir,WildCard,Expand),
255 expand_file_name(Expand,Files),
256 write(':- dynamic verbnet/5.'), nl,
257 process(Files),
258 halt.
259
260:- run.