1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2005-2022, University of Amsterdam 7 SWI-Prolog Solutions b.v. 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(unicode_data, 37 [ unicode_property/2 % ?Code, ?Property 38 ]). 39:- use_module(library(table)). 40 41/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 42This module provides access to the UNICODE datafile distributed by the 43unicode organisation (http://www.unicode.org). This file describes many 44aspects for all defined UNICODE code positions, such as their name, 45type, etc. The meaning of the fields is defined here: 46 47 http://www.unicode.org/Public/UNIDATA/UCD.html#UCD_File_Format 48 49This library uses the table package for accessing structured files. This 50maps the file in memory and performs binary search. This is not blindly 51fast and this library should therefore not be used for computationally 52intensive tasks. In such cases it can be used to generate tables in 53Prolog or even to create a dedicated C datastructure. 54 55The file UnicodeData.txt itself is not part of the library and must be 56obtained and installed separately. This is because of its size (close to 571MB). Increasing the footprint of the environment with 1MB is too much. 58 59The UCD file must be named UnicodeData.txt and placed in the same 60directory as this file or in the search path 'unicode' (see 61file_search_path/2). 62- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
70unicode_property(Code, Property) :- 71 utable(Handle), 72 property(Property), 73 in_table(Handle, [code(Code), Property], _), 74 \+ arg(1, Property, ''). 75 76property(name(_)). 77property(general_category(_)). 78property(canonical_combining_class(_)). 79property(bidi_class(_)). 80property(decomposition_type(_)). 81property(numeric_type_1(_)). 82property(numeric_type_2(_)). 83property(numeric_type_3(_)). 84property(bidi_mirrored(_)). 85property(unicode_1_name(_)). 86property(iso_comment(_)). 87property(simple_uppercase_mapping(_)). 88property(simple_lowercase_mapping(_)). 89property(simple_titlecase_mapping(_)). 90 91:- dynamic 92 handle/1. 93:- volatile 94 handle/1. 95 96:- multifile 97 user:file_search_path/2. 98:- dynamic 99 user:file_search_path/2. 100 101:- ( user:file_search_path(unicode, _) 102 -> true 103 ; prolog_load_context(directory, Dir), 104 assert(user:file_search_path(unicode, Dir)) 105 ). 106 107utable(Handle) :- 108 handle(Handle), 109 !. 110utable(Handle) :- 111 absolute_file_name(unicode('UnicodeData.txt'), 112 Path, 113 [ access(read) 114 ]), 115 new_table(Path, 116 [ code(hexadecimal, [sorted, unique]), 117 name(atom, [downcase]), % 1 118 general_category(atom), % 2 119 canonical_combining_class(integer), % 3 120 bidi_class(atom, [downcase]), % 4 121 decomposition_type(atom), % 5 122 numeric_type_1(integer, [syntax]), % 6 123 numeric_type_2(integer, [syntax]), % 7 124 numeric_type_3(integer, [syntax]), % 8 125 bidi_mirrored(atom, [downcase]), % 9 126 unicode_1_name(atom, [downcase]), % 10 127 iso_comment(atom), % 11 128 simple_uppercase_mapping(hexadecimal, [syntax]), % 12 129 simple_lowercase_mapping(hexadecimal, [syntax]), % 13 130 simple_titlecase_mapping(hexadecimal, [syntax]) % 14 131 ], 132 [ field_separator(0';) 133 ], 134 Handle), 135 assert(handle(Handle))