3
7
8:- prolog_load_context(file,File),
9 absolute_file_name('..',X,[relative_to(File),file_type(directory)]),
10 asserta(user:file_search_path(candc,X)). 11
12user:file_search_path(semlib, candc(lib)).
13user:file_search_path(boxer, candc(boxer)).
14
15:- set_prolog_flag(double_quotes,codes). 16
17
21
22:- use_module(library(lists),[member/2,append/3]). 23:- use_module(library(readutil),[read_line_to_codes/2]). 24:- use_module(boxer(version),[version/1]). 25:- use_module(semlib(errors),[error/2,warning/2]). 26:- use_module(semlib(options),[option/2,parseOptions/2,setOption/3,
27 showOptions/1,setDefaultOptions/1]). 28
29
33
34tokkie:-
35 option(Option,do),
36 member(Option,['--version','--help']), !,
37 version,
38 help.
39
40tokkie:-
41 openInput(InStream),
42 openOutput(OutStream), !,
43 read_line_to_codes(InStream,Codes),
44 readLines(Codes,InStream,OutStream).
45
46tokkie:-
47 setOption(tokkie,'--help',do), !,
48 help.
49
50
54
55readLines(end_of_file,Stream1,Stream2):- !,
56 close(Stream1),
57 close(Stream2).
58
59readLines(Codes,InStream,OutStream):- !,
60 tok(Codes,TokCodes,Last),
61 format(OutStream,'~s',[TokCodes]),
62 read_line_to_codes(InStream,NewCodes),
63 decideNewLine(NewCodes,Last,OutStream),
64 readLines(NewCodes,InStream,OutStream).
65
66
70
71decideNewLine(end_of_file,_Last,Stream):- !, nl(Stream).
72decideNewLine([C1,C2|_],_,Stream):- lower(C1), lower(C2), !, write(Stream,' ').
73decideNewLine(_,Last,Stream):- title(Last), !, write(Stream,' ').
74decideNewLine(_,Last,_Stream):- mistake(Last), !.
75decideNewLine(_,_Last,Stream):- nl(Stream).
76
77
81
82tok([],[],[]):- !.
83tok([65533|L1],L2,Last):- !, warning('skipping non-utf8 character',[]), tok(L1,L2,Last).
84tok([32|L1],L2,Last):- !, tok(L1,L2,Last).
85tok([9|L1],L2,Last):- !, tok(L1,L2,Last).
86tok(L1,L2,Last):- tok(L1,[],L2,Last).
87
88tok([],Last,[],Last):- !.
89tok([65533|L1],Prev,L2,Last):- !, warning('skipping non-utf8 character',[]), tok(L1,Prev,L2,Last).
90tok(P1,Prev,P2,Last):- pattern(P1-L1,Prev,P2-L2,Next), !, tok(L1,Next,L2,Last).
91tok([32|L1],_,[32|L2],Last):- !, tok(L1,[],L2,Last).
92tok([X|L1],Prev,[X|L2],Last):- tok(L1,[X|Prev],L2,Last).
93
94
98
102
103pattern([32]-[], X, B-B, X):- !.
104pattern([9]-[], X, B-B, X):- !.
105
109
110pattern([32,32|A]-A, [], B-B, []):- !. 111pattern([32,32|A]-[32|A], X, B-B, X):- !. 112pattern([9,32|A]-[32|A], X, B-B, X):- !. 113pattern([32,9|A]-[32|A], X, B-B, X):- !. 114pattern([9,9|A]-[32|A], X, B-B, X):- !. 115pattern([9|A]-[32|A], X, B-B, X):- !. 116
121
122pattern(D-[], Prev, [46,32,46,46,46|B]-B, [46,46,46]):- dots(D,A), end(A), abb(Prev), !.
123pattern(D-[], Prev, B1-B2, [46,46,46]):- dots(D,A),end(A), !, insertSpace(Prev,[46,46,46|B2],B1).
124
128
129pattern(D-[L|A], Prev, B1-B2,[]):- dots(D,[L|A]), lower(L), !, insertSpace(Prev,[46,46,46,32|B2],B1).
130pattern(D-[L|A], Prev, B1-B2,[]):- dots(D,[L|A]), upper(L), !, insertSpace(Prev,[46,46,46,10|B2],B1).
131pattern(D-A, Prev, B1-B2,[]):- dots(D,A), !, insertSpace(Prev,[46,46,46,32|B2],B1).
132
136
137pattern([46,Q|A]-[], Prev, B1-B2, [Q]):- bracket(Q), end(A), !, insertSpace(Prev,[46,32,Q|B2],B1). 138
142
143pattern([46,Q|A]-[], Prev, B1-B2, [46]):- quote(Q),end(A),option('--quotes',delete), !, insertSpace(Prev,[46|B2],B1). 144pattern([46,Q|A]-[], Prev, B1-B2, [Q]):- quote(Q),end(A),option('--quotes',keep), !, insertSpace(Prev,[46,32,Q|B2],B1). 145
146pattern([46,Q,Q|A]-[], Prev, B1-B2, [46]):- quotes(Q),end(A),option('--quotes',delete), !, insertSpace(Prev,[46|B2],B1). 147pattern([46,Q,Q|A]-[], Prev, B1-B2, [Q]):- quotes(Q),end(A),option('--quotes',keep), !, insertSpace(Prev,[46,32,Q,Q|B2],B1). 148
149pattern([46,32,Q1,Q2|A]-[], Prev, B1-B2, [46]):- quote(Q1),quote(Q2),\+Q1=Q2,end(A),option('--quotes',delete), !, insertSpace(Prev,[46|B2],B1). 150pattern([46,32,Q1,Q2|A]-[], Prev, B1-B2, [Q2]):- quote(Q1),quote(Q2),\+Q1=Q2,end(A),option('--quotes',keep), !, insertSpace(Prev,[46,32,Q1,32,Q2|B2],B1). 151
155
156pattern([46,Q,32,U|A]-[U|A], Prev, B1-B2, []):- quote(Q),upper(U),option('--quotes',delete), !, insertSpace(Prev,[46,10|B2],B1). 157pattern([46,Q,32,U|A]-[U|A], Prev, B1-B2, []):- quote(Q),upper(U),option('--quotes',keep), !, insertSpace(Prev,[46,32,Q,10|B2],B1). 158pattern([46,Q,32,U|A]-[U|A], Prev, B1-B2, []):- closing_bracket(Q),upper(U), !, insertSpace(Prev,[46,32,Q,10|B2],B1). 159
164
165pattern([46|A]-[], Prev, [46|B]-B, Prev):- end(A), title(Prev), !. 166pattern([46|A]-[], Prev, [46|B]-B, [46|Prev]):- end(A), abb(Prev), !. 167pattern([46|A]-[], Prev, B1-B2, [46]):- end(A), !, insertSpace(Prev,[46|B2],B1). 168
172
173pattern([46,32,Q,115|A]-A, [_|_], [46,32,Q,115|B]-B, [115,Q]):- rsq(Q), !.
174pattern([46,32,Q,C|A]-[Q,C|A], Prev, B1-B2, []):- quote(Q), upper(C), !, insertSpace(Prev,[46,10|B2],B1).
175pattern([46,32,Q,Q,C|A]-[Q,Q,C|A], Prev, B1-B2, []):- quotes(Q), upper(C), !, insertSpace(Prev,[46,10|B2],B1).
176pattern([46,32,Q,C|A]-[Q,C|A], Prev, B1-B2, []):- opening_bracket(Q), upper(C), !, insertSpace(Prev,[46,10|B2],B1).
177
186
187pattern([46,32,U|A]-[U|A], [], [46,10|B]-B, []):- upper(U), !.
188pattern([46,32,U|A]-[U|A], [_], [46,32|B]-B, []):- upper(U), !. 189pattern([46,32,U|A]-[U|A], Prev, [46,32|B]-B, []):- upper(U), title(Prev), !.
190pattern([46,32,U|A]-[U|A], Prev, [32,46,10|B]-B, []):- upper(U), \+ abb(Prev), !.
192
193pattern([46,32,32,U|A]-[U|A], [], [46,10|B]-B, []):- upper(U), !.
194pattern([46,32,32,U|A]-[U|A], [_], [46,32|B]-B, []):- upper(U), !. 195pattern([46,32,32,U|A]-[U|A], Prev, [46,32|B]-B, []):- upper(U), title(Prev), !.
196pattern([46,32,32,U|A]-[U|A], Prev, [32,46,10|B]-B, []):- upper(U), \+ abb(Prev), !.
197
201
202pattern([X,44,Y|A]-[Y|A], P, [X,44|B]-B, [44|P]):- num(X), num(Y), !. 203pattern([44|A]-[32|A], Prev, B1-B2, [44]):- !, insertSpace(Prev,[44|B2],B1). 204
208
209pattern([X|A]-[32|A], Prev, B1-B2, [X]):- bracket(X), !, insertSpace(Prev,[X|B2],B1).
210
214
215pattern([58|A]-[32|A], Prev, B1-B2, [58]):- !, insertSpace(Prev,[58|B2],B1).
216
220
221pattern([59|A]-[32|A], Prev, B1-B2, [59]):- !, insertSpace(Prev,[59|B2],B1).
222
226
227pattern([X|A]-[32|A], Prev, B1-B2, [X]):- mark(X), !, insertSpace(Prev,[X|B2],B1).
228
232
233pattern([X,37|A]-[32|A], _, [X,32,37|B]-B, [37]):- num(X), !.
234
238
239pattern([36,X|A]-[X|A], _, [36,32|B]-B, [32]):- num(X), !.
240pattern([128,X|A]-[X|A], _, [128,32|B]-B, [32]):- num(X), !.
241
245
246pattern([Q,N1,N2,115|A]-A, [], [Q,N1,N2,115|B]-B, [115,N2,N1,Q]):- rsq(Q), num(N1),num(N2), !. 247pattern([Q,N1,N2,N|A]-[N|A], [], [Q,N1,N2|B]-B, [N2,N1,Q]):- rsq(Q), num(N1),num(N2), \+ alphanum(N), !. 248
252
253pattern([X,Q,115,N|A]-[N|A], [_|_], [X,32,Q,115|B]-B, [115,Q]):- option('--language',en), rsq(Q), alpha(X), \+ alphanum(N), !. 254pattern([Q,115,N|A]-[N|A], Prev, [32,Q,115|B]-B, [115,Q]):- option('--language',en), abb(Prev), rsq(Q), \+ alphanum(N), !. 255pattern([X,Q,83,N|A]-[N|A], [_|_], [X,32,Q,83|B]-B, [83,Q]):- option('--language',en), rsq(Q), alpha(X), \+ alphanum(N), !. 256pattern([Q,115,N|A]-[N|A], [], [Q,115|B]-B, [115,Q]):- option('--language',en), rsq(Q), \+ alphanum(N), !. 257pattern([115,Q,N|A]-[N|A], [_|_], [115,32,Q|B]-B, [Q,115]):- option('--language',en), rsq(Q), \+ alphanum(N), !. 258
262
263pattern([X,Q,109|A]-A, _, [X,32,Q,109|B]-B, [109,Q]):- option('--language',en), rsq(Q), alpha(X), !. 264pattern([X,Q,100|A]-A, _, [X,32,Q,100|B]-B, [100,Q]):- option('--language',en), rsq(Q), alpha(X), !. 265pattern([X,Q,108,108|A]-A, _, [X,32,Q,108,108|B]-B, [108,108,Q]):- option('--language',en), rsq(Q), alpha(X), !. 266pattern([X,Q,118,101|A]-A, _, [X,32,Q,118,101|B]-B, [101,118,Q]):- option('--language',en), rsq(Q), alpha(X), !. 267pattern([X,Q,114,101|A]-A, _, [X,32,Q,114,101|B]-B, [101,114,Q]):- option('--language',en), rsq(Q), alpha(X), !. 268pattern([X,110,Q,116|A]-A, _, [X,32,110,Q,116|B]-B, [116,Q,110]):- option('--language',en), rsq(Q), alpha(X), !. 269
273
274pattern([108,Q,X|A]-[X|A], Prev, B1-B2, []):- option('--language',it), alpha(X), rsq(Q), !, insertSpace(Prev,[108,Q,32|B2],B1). 275
276
280
281pattern([U1,Q,U2|A]-A, [], [U1,Q,U2|B]-B, [U2,Q,U1]):- rsq(Q), alpha(U1),alpha(U2). 282
286
287pattern([32,Q,Q,32|A]-[32|A], X, B-B, X):- quotes(Q), option('--quotes',delete), !.
288pattern([Q,Q|A]-A, X, B-B, X):- quotes(Q), option('--quotes',delete), !.
289pattern([X,X|A]-[32|A], Prev, B1-B2, [X,X]):- quotes(X), !, insertSpace(Prev,[X,X|B2],B1).
290
294
295pattern([32,Q,32|A]-[32|A], X, B-B, X):- quote(Q), option('--quotes',delete), !.
296pattern([Q|A]-A, X, B-B, X):- quote(Q), option('--quotes',delete), !.
297pattern([X|A]-[32|A], Prev, B1-B2, [X]):- quote(X), !, insertSpace(Prev,[X|B2],B1).
298
299
303
304alphanum(X):- alpha(X), !.
305alphanum(X):- num(X), !.
306
307alpha(62):- !. 308alpha(X):- upper(X), !.
309alpha(X):- lower(X), !.
310
311upper(X):- X > 64, X < 91, !.
312lower(X):- X > 96, X < 123, !.
313
314num(X):- X > 47, X < 58, !.
315
316
320
321insertSpace([], L, L):- !.
322insertSpace( _, L, [32|L]).
323
324
328
329bracket(X):- opening_bracket(X).
330bracket(X):- closing_bracket(X).
331
332opening_bracket(40). 333opening_bracket(91). 334opening_bracket(123). 335
336closing_bracket(41). 337closing_bracket(93). 338closing_bracket(125). 339
340
344
345rsq(39).
346rsq(8217).
347
348
352
353quote(34). 354quote(39). 355quote(96). 356quote(8216). 357quote(8217). 358quote(8218). 359quote(8220). 360quote(8221). 361quote(8222). 362
363
367
368quotes(96). 369quotes(39). 370quotes(8216).
371quotes(8217).
372quotes(8218).
373
377
378mark(63). 379mark(33). 380
381
386
387title(Title):- option('--language',Language), title(Language,Title), !.
388
389title(en, "rM"). 390title(en, "srsseM"). 391title(en, "srM"). 392title(en, "semM"). 393title(en, "sM"). 394title(en, "rD"). 395title(en, "srD"). 396title(en, "forP"). 397title(en, "neS"). 398title(en, "voG"). 399title(en, "tS"). 400title(en, "peR"). 401title(en, "neG"). 402title(en, "tL"). 403title(en, "tueiL"). 404title(en, "loC"). 405title(en, "mdA"). 406title(en, "tpC"). 407title(en, "veR"). 408title(en, "noH"). 409title(en, "tpaC"). 410title(en, "rdmC"). 411title(en, "nlpahc"). 412
413title(en, "v"). 414title(en, "sv"). 415title(en, "eiC"). 416title(en, "a.k.a"). 417title(en, "tM"). 418
422
423abb(Codes):- member(46,Codes), member(X,Codes), alpha(X), !.
424abb(Abb):- option('--language',Language), abb(Language,Abb), !.
425
426abb(en, "proC"). 427abb(en, "cnI"). 428abb(en, "oC"). 429abb(en, "dtL"). 430abb(en, "rJ"). 431abb(en, "rS"). 432abb(en, "soC"). 433abb(en, "sorB"). 434abb(en, "cte"). 435
436
440
441mistake(".p.S").
442
443
447
448end([]):- !.
449end([32|L]):- !, end(L).
450end([9|L]):- !, end(L).
451
452
456
457dots(In,Out):-
458 dots(In,0,Out).
459
460dots([32,46,32|In],N,Out):- !,
461 M is N + 1,
462 dots(In,M,Out).
463
464dots([46,32|In],N,Out):- !,
465 M is N + 1,
466 dots(In,M,Out).
467
468dots([46|In],N,Out):- !,
469 M is N + 1,
470 dots(In,M,Out).
471
472dots(Out,N,Out):-
473 N > 1.
474
475
479
480openInput(Stream):-
481 option('--stdin',dont),
482 option('--input',File),
483 exists_file(File), !,
484 open(File,read,Stream,[encoding(utf8)]).
485
486openInput(Stream):-
487 option('--stdin',do),
488 set_prolog_flag(encoding,utf8),
489 warning('reading from standard input',[]),
490 prompt(_,''),
491 Stream = user_input.
492
493
497
498openOutput(Stream):-
499 option('--output',Output),
500 atomic(Output),
501 \+ Output=user_output,
502 ( access_file(Output,write), !,
503 open(Output,write,Stream,[encoding(utf8)])
504 ; error('cannot write to specified file ~p',[Output]),
505 Stream=user_output ), !.
506
507openOutput(user_output).
508
509
513
514version:-
515 option('--version',do), !,
516 version(V),
517 format(user_error,'~p~n',[V]).
518
519version.
520
521
525
526help:-
527 option('--help',do), !,
528 format(user_error,'usage: tokkie [options]~n~n',[]),
529 showOptions(tokkie).
530
531help:-
532 option('--help',dont), !.
533
534
538
539start:-
540 current_prolog_flag(argv,[_Comm|Args]),
541 setDefaultOptions(tokkie),
542 parseOptions(tokkie,Args),
543 tokkie, !,
544 halt.
545
546start:-
547 error('tokkie failed',[]),
548 halt