1:- module(semantria, [ process_document/2 2 , queue_document/2 3 , request_document/2 4 , request/3 5 ]). 6 7:- use_module(library(base64), [base64/2]). 8:- use_module(library(condition)). 9:- use_module(library(error), [must_be/2]). 10:- use_module(library(func)). 11:- use_module(library(http/http_header)). % needed for POST requests 12:- use_module(library(http/http_open), [http_open/3]). 13:- use_module(library(http/http_ssl_plugin)). 14:- use_module(library(http/json), [atom_json_term/3, json_read/2]). 15:- use_module(library(random), [random_between/3]). 16:- use_module(library(readutil), [read_stream_to_codes/2]). 17:- use_module(library(sha), [hash_atom/2, hmac_sha/4, sha_hash/3]). 18:- use_module(library(uri), [uri_encoded/3]). 19:- use_module(library(uri_qq)).
25:- multifile consumer_key/1. 26:- dynamic consumer_key/1.
This hack is necessary because SWI Prolog doesn't seem to have a good MD5 implementation. Run `md5sum $secret_key` at a command prompt to get the value you need.
37:- multifile secret_key_md5/1. 38:- dynamic secret_key_md5/1. 39 40 41% the base URL for all API requests to Semantria 42api_base('https://api35.semantria.com/').
54process_document(Document, Response) :- 55 document_id(Document, Id), 56 process_document_(Document, Id, 10, Response). 57 58process_document_(_, Id, Tries, Response) :- 59 handle( request_document(Id, Response0) 60 , error(_,context(_,status(404,_))) 61 , fail 62 ), 63 !, 64 Status = Response0.status, 65 ( Status == "PROCESSED" -> 66 Response = Response0 67 ; Status == "QUEUED" -> 68 poll_document(Id, Tries, Response) 69 ; Status == "FAILED" -> 70 throw("Semantria document processing failed") 71 ; % otherwise -> 72 must_be(one_of(["PROCESSED","QUEUED","FAILED"]), Status) 73 ). 74process_document_(Document, Id, Tries, Response) :- 75 queue_document(Document, Id), 76 process_document_(Document, Id, Tries, Response). 77 78poll_document(Id, Tries0, Response) :- 79 sleep(1), % give Semantria a chance to finish its work 80 Tries is Tries0 - 1, 81 ( Tries =< 0 -> throw("Too many retries") ; true ), 82 process_document_(_, Id, Tries, Response).
90queue_document(Document, Id) :- 91 % prepare arguments 92 must_be(string, Document), 93 document_id(Document, Id), 94 95 % submit document to Semantria 96 Details = _{ id: Id, text: Document }, 97 request(post(Details), document, _). 98 99document_id(_, Id) :- 100 ground(Id), 101 !. 102document_id(Document, Id) :- 103 sha_hash(Document,HashBytes,[]), 104 hash_atom(HashBytes, IdLong), 105 sub_atom(IdLong, 0, 32, _, IdShort), % 32 char max per API docs 106 atom_string(IdShort, Id).
115request_document(Id, Response) :- 116 request(get, document/Id, Response). 117 118 119%% request(+Method, +Path, Response:dict) 120% 121% Low-level predicate for making authenticated API calls. Method 122% specifies the HTTP method. Path indicates the path. It can be an 123% atom or a term (like `document/some_document_id`). For example, 124% 125% ?- request(get, status, R). 126% R = _{api_version:"3.5", ...} . 127% 128% For a POST request, make `Method=post(Dict)`. The Dict is converted 129% into a JSON object and included as the request body. 130request(Method, Path, Response) :- 131 sign_request('~w.json' $ Path, _{}, Url, Auth), 132 debug(semantria, "request URL: ~s~n", [Url]), 133 catch( request_open(Method, Url, Auth, Stream) 134 , E 135 , failable_exception(E) 136 ), 137 json_read(Stream, Json), 138 json_to_dict(Json, Response). 139 140request_open(get, Url, Auth, Stream) :- 141 http_open( Url 142 , Stream 143 , [ method(get) 144 , request_header(authorization=Auth) 145 , cert_verify_hook(ssl_verify) 146 ] 147 ), 148 set_stream(Stream, encoding(utf8)). 149request_open(post(Dict), Url, Auth, Stream) :- 150 % convert Dict to JSON 151 dict_pairs(Dict, json, Pairs0), 152 maplist(eq_dash, Pairs, Pairs0), 153 atom_json_term(Json, json(Pairs), [as(atom)]), 154 debug(semantria, "request JSON body: ~s~n", [Json]), 155 156 % POST JSON to Semantria 157 http_open( Url 158 , Stream 159 , [ post(atom(application/json, Json)) 160 , request_header(authorization=Auth) 161 , status_code(202) 162 , cert_verify_hook(ssl_verify) 163 ] 164 ). 165 166 167eq_dash(K=V,K-V). 168 169 170% accept all SSL certificates 171ssl_verify( _SSL 172 , _ProblemCertificate 173 , _AllCertificates 174 , _FirstCertificate 175 , _Error 176 ). 177 178 179% convert an exception into a signal which can either fail or rethrow. 180% this is convenient for converting predicates that throw exceptions 181% into predicates that raise signals. 182% maybe it'd be convenient to have call_signal/2 which is like call/1 183% but automatically uses this predicate to convert exceptions into 184% signals. 185failable_exception(E) :- 186 ( signal(E, Restart) -> 187 ( Restart == fail -> 188 fail 189 ; % unexpected restart -> 190 must_be(one_of([fail]), Restart) 191 ) 192 ; % signal not handled -> 193 throw(E) 194 ). 195 196 197 198json_to_dict(json(EqPairs), Dict) :- 199 !, 200 maplist(json_pair, EqPairs, DashPairs), 201 dict_pairs(Dict, _, DashPairs). 202json_to_dict(Term, Term). 203 204json_pair(Key=Value0, Key-Value) :- 205 ( atom(Value0) -> 206 atom_string(Value0, Value) 207 ; Value0=json(_) -> 208 json_to_dict(Value0, Value) 209 ; is_list(Value0) -> 210 maplist(json_to_dict, Value0, Value) 211 ; true -> 212 Value = Value0 213 ). 214 215 216% generate the URL and Authorization header that's needed for making 217% requests to Semantria. documentation for this process is available 218% at https://semantria.com/developer The written docs are somewhat poor, 219% so it's best to consult the various SDKs and their source code to 220% resolve questions. 221sign_request(Path, Params0, Url, Authorization) :- 222 % preliminaries 223 api_base(Base), 224 nonce(Nonce), 225 now(Now), 226 227 % build "Signature Base String" 228 Extra = [ oauth_consumer_key=consumer_key(~) 229 , oauth_nonce=Nonce 230 , oauth_signature_method="HMAC-SHA1" 231 , oauth_timestamp=Now 232 , oauth_version=1.0 233 ], 234 put_dict(Extra, Params0, Params), 235 Url = {|uri(Base)||$Path?$Params|}, 236 237 % build HMAC-SHA1 signature 238 secret_key_md5(Key), 239 hmac_sha(Key, uri_encode $ Url, SignatureBytes, [algorithm(sha1)]), 240 base64(atom_codes(~,SignatureBytes), Signature64), 241 uri_encode(Signature64, Signature), 242 243 % build Authorization header 244 authorization_header([ oauth_signature=Signature 245 | Extra 246 ] 247 , Authorization 248 ). 249 250 251% comma-separated Authorization header 252authorization_header(Values, Header) :- 253 maplist(quote_headerval, Values, Parts), 254 atomic_list_concat(Parts, ', ', PartialHeader), 255 Header = 'OAuth, ~s' $ PartialHeader. 256 257quote_headerval(Key=Value,Auth) :- 258 Auth = '~s="~w"' $ [Key,Value]. 259 260 261% generate a large random integer 262nonce(Nonce) :- 263 random_between(1,18_446_744_073_709_551_616,Nonce). 264 265 266% current time in integer seconds since the epoch 267now(T) :- 268 get_time(Tfloat), 269 T is round(Tfloat). 270 271 272% encode URI values as Semantria expects. 273% uri_encoded/3 doesn't encode :, / or ? characters. 274uri_encode(Value, Encoded) :- 275 uri_encoded(query_value, Value, E0), 276 atom_codes(E0, E1), 277 once(phrase(enc, E1, E2)), 278 atom_codes(Encoded, E2). 279 280enc, "%3A" --> ":", enc. 281enc, "%2F" --> "/", enc. 282enc, "%3F" --> "?", enc. 283enc, [C] --> [C], enc. 284enc --> { true }