View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2005-2022, University of Amsterdam
    7                              SWI-Prolog Solutions b.v.
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(unicode_data,
   37          [ unicode_property/2          % ?Code, ?Property
   38          ]).   39:- use_module(library(table)).   40
   41/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   42This module provides access to the   UNICODE datafile distributed by the
   43unicode organisation (http://www.unicode.org). This  file describes many
   44aspects for all defined UNICODE  code   positions,  such  as their name,
   45type, etc.  The meaning of the fields is defined here:
   46
   47        http://www.unicode.org/Public/UNIDATA/UCD.html#UCD_File_Format
   48
   49This library uses the table package for accessing structured files. This
   50maps the file in memory and performs  binary search. This is not blindly
   51fast and this library should therefore   not be used for computationally
   52intensive tasks. In such cases it  can   be  used  to generate tables in
   53Prolog or even to create a dedicated C datastructure.
   54
   55The file UnicodeData.txt itself is not part   of the library and must be
   56obtained and installed separately. This is because of its size (close to
   571MB). Increasing the footprint of the environment with 1MB is too much.
   58
   59The UCD file must be  named  UnicodeData.txt   and  placed  in  the same
   60directory  as  this  file  or  in    the   search  path  'unicode'  (see
   61file_search_path/2).
   62- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 unicode_property(?Code, ?Property)
Logical predicate relating code points to properties. It is optimised for asking a single property of a known code, but works with any instantiation.
   70unicode_property(Code, Property) :-
   71    utable(Handle),
   72    property(Property),
   73    in_table(Handle, [code(Code), Property], _),
   74    \+ arg(1, Property, '').
   75
   76property(name(_)).
   77property(general_category(_)).
   78property(canonical_combining_class(_)).
   79property(bidi_class(_)).
   80property(decomposition_type(_)).
   81property(numeric_type_1(_)).
   82property(numeric_type_2(_)).
   83property(numeric_type_3(_)).
   84property(bidi_mirrored(_)).
   85property(unicode_1_name(_)).
   86property(iso_comment(_)).
   87property(simple_uppercase_mapping(_)).
   88property(simple_lowercase_mapping(_)).
   89property(simple_titlecase_mapping(_)).
   90
   91:- dynamic
   92    handle/1.   93:- volatile
   94    handle/1.   95
   96:- multifile
   97    user:file_search_path/2.   98:- dynamic
   99    user:file_search_path/2.  100
  101:- (   user:file_search_path(unicode, _)
  102   ->  true
  103   ;   prolog_load_context(directory, Dir),
  104       assert(user:file_search_path(unicode, Dir))
  105   ).  106
  107utable(Handle) :-
  108    handle(Handle),
  109    !.
  110utable(Handle) :-
  111    absolute_file_name(unicode('UnicodeData.txt'),
  112                       Path,
  113                       [ access(read)
  114                       ]),
  115    new_table(Path,
  116              [ code(hexadecimal, [sorted, unique]),
  117                name(atom, [downcase]),                             % 1
  118                general_category(atom),                             % 2
  119                canonical_combining_class(integer),                 % 3
  120                bidi_class(atom, [downcase]),                       % 4
  121                decomposition_type(atom),                           % 5
  122                numeric_type_1(integer, [syntax]),                  % 6
  123                numeric_type_2(integer, [syntax]),                  % 7
  124                numeric_type_3(integer, [syntax]),                  % 8
  125                bidi_mirrored(atom, [downcase]),                    % 9
  126                unicode_1_name(atom, [downcase]),                   % 10
  127                iso_comment(atom),                                  % 11
  128                simple_uppercase_mapping(hexadecimal, [syntax]),    % 12
  129                simple_lowercase_mapping(hexadecimal, [syntax]),    % 13
  130                simple_titlecase_mapping(hexadecimal, [syntax])     % 14
  131              ],
  132              [ field_separator(0';)
  133              ],
  134              Handle),
  135    assert(handle(Handle))