| #!/usr/bin/env python |
| # |
| # Copyright 2011-2016 The Rust Project Developers. See the COPYRIGHT |
| # file at the top-level directory of this distribution and at |
| # http://rust-lang.org/COPYRIGHT. |
| # |
| # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| # option. This file may not be copied, modified, or distributed |
| # except according to those terms. |
| |
| # This script uses the following Unicode tables: |
| # - Categories.txt |
| |
| import os |
| import subprocess |
| |
| def to_ranges(iter): |
| current = None |
| for i in iter: |
| if current is None or i != current[1] or i in (0x10000, 0x20000): |
| if current is not None: |
| yield tuple(current) |
| current = [i, i + 1] |
| else: |
| current[1] += 1 |
| if current is not None: |
| yield tuple(current) |
| |
| def get_escaped(dictionary): |
| for i in range(0x110000): |
| if dictionary.get(i, "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and i != ord(' '): |
| yield i |
| |
| def get_file(f): |
| try: |
| return open(os.path.basename(f)) |
| except FileNotFoundError: |
| subprocess.run(["curl", "-O", f], check=True) |
| return open(os.path.basename(f)) |
| |
| def main(): |
| file = get_file("http://www.unicode.org/notes/tn36/Categories.txt") |
| |
| dictionary = {int(line.split()[0], 16): line.split()[1] for line in file} |
| |
| CUTOFF=0x10000 |
| singletons0 = [] |
| singletons1 = [] |
| normal0 = [] |
| normal1 = [] |
| extra = [] |
| |
| for a, b in to_ranges(get_escaped(dictionary)): |
| if a > 2 * CUTOFF: |
| extra.append((a, b - a)) |
| elif a == b - 1: |
| if a & CUTOFF: |
| singletons1.append(a & ~CUTOFF) |
| else: |
| singletons0.append(a) |
| elif a == b - 2: |
| if a & CUTOFF: |
| singletons1.append(a & ~CUTOFF) |
| singletons1.append((a + 1) & ~CUTOFF) |
| else: |
| singletons0.append(a) |
| singletons0.append(a + 1) |
| else: |
| if a >= 2 * CUTOFF: |
| extra.append((a, b - a)) |
| elif a & CUTOFF: |
| normal1.append((a & ~CUTOFF, b - a)) |
| else: |
| normal0.append((a, b - a)) |
| |
| print("""\ |
| // Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT |
| // file at the top-level directory of this distribution and at |
| // http://rust-lang.org/COPYRIGHT. |
| // |
| // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. This file may not be copied, modified, or distributed |
| // except according to those terms. |
| |
| // NOTE: The following code was generated by "src/etc/char_private.py", |
| // do not edit directly! |
| |
| use slice::SliceExt; |
| |
| fn check(x: u16, singletons: &[u16], normal: &[u16]) -> bool { |
| for &s in singletons { |
| if x == s { |
| return false; |
| } else if x < s { |
| break; |
| } |
| } |
| for w in normal.chunks(2) { |
| let start = w[0]; |
| let len = w[1]; |
| let difference = (x as i32) - (start as i32); |
| if 0 <= difference { |
| if difference < len as i32 { |
| return false; |
| } |
| } else { |
| break; |
| } |
| } |
| true |
| } |
| |
| pub fn is_printable(x: char) -> bool { |
| let x = x as u32; |
| let lower = x as u16; |
| if x < 0x10000 { |
| check(lower, SINGLETONS0, NORMAL0) |
| } else if x < 0x20000 { |
| check(lower, SINGLETONS1, NORMAL1) |
| } else {\ |
| """) |
| for a, b in extra: |
| print(" if 0x{:x} <= x && x < 0x{:x} {{".format(a, a + b)) |
| print(" return false;") |
| print(" }") |
| print("""\ |
| true |
| } |
| }\ |
| """) |
| print() |
| print("const SINGLETONS0: &'static [u16] = &[") |
| for s in singletons0: |
| print(" 0x{:x},".format(s)) |
| print("];") |
| print("const SINGLETONS1: &'static [u16] = &[") |
| for s in singletons1: |
| print(" 0x{:x},".format(s)) |
| print("];") |
| print("const NORMAL0: &'static [u16] = &[") |
| for a, b in normal0: |
| print(" 0x{:x}, 0x{:x},".format(a, b)) |
| print("];") |
| print("const NORMAL1: &'static [u16] = &[") |
| for a, b in normal1: |
| print(" 0x{:x}, 0x{:x},".format(a, b)) |
| print("];") |
| |
| if __name__ == '__main__': |
| main() |