| /* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl> |
| * |
| * Permission to use, copy, modify, and/or distribute this software for any |
| * purpose with or without fee is hereby granted, provided that the above |
| * copyright notice and this permission notice appear in all copies. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| */ |
| |
| /* Derived from https://github.com/bnoordhuis/punycode |
| * but updated to support IDNA 2008. |
| */ |
| |
| #include "uv.h" |
| #include "idna.h" |
| #include <string.h> |
| |
| static unsigned uv__utf8_decode1_slow(const char** p, |
| const char* pe, |
| unsigned a) { |
| unsigned b; |
| unsigned c; |
| unsigned d; |
| unsigned min; |
| |
| if (a > 0xF7) |
| return -1; |
| |
| switch (*p - pe) { |
| default: |
| if (a > 0xEF) { |
| min = 0x10000; |
| a = a & 7; |
| b = (unsigned char) *(*p)++; |
| c = (unsigned char) *(*p)++; |
| d = (unsigned char) *(*p)++; |
| break; |
| } |
| /* Fall through. */ |
| case 2: |
| if (a > 0xDF) { |
| min = 0x800; |
| b = 0x80 | (a & 15); |
| c = (unsigned char) *(*p)++; |
| d = (unsigned char) *(*p)++; |
| a = 0; |
| break; |
| } |
| /* Fall through. */ |
| case 1: |
| if (a > 0xBF) { |
| min = 0x80; |
| b = 0x80; |
| c = 0x80 | (a & 31); |
| d = (unsigned char) *(*p)++; |
| a = 0; |
| break; |
| } |
| return -1; /* Invalid continuation byte. */ |
| } |
| |
| if (0x80 != (0xC0 & (b ^ c ^ d))) |
| return -1; /* Invalid sequence. */ |
| |
| b &= 63; |
| c &= 63; |
| d &= 63; |
| a = (a << 18) | (b << 12) | (c << 6) | d; |
| |
| if (a < min) |
| return -1; /* Overlong sequence. */ |
| |
| if (a > 0x10FFFF) |
| return -1; /* Four-byte sequence > U+10FFFF. */ |
| |
| if (a >= 0xD800 && a <= 0xDFFF) |
| return -1; /* Surrogate pair. */ |
| |
| return a; |
| } |
| |
| unsigned uv__utf8_decode1(const char** p, const char* pe) { |
| unsigned a; |
| |
| a = (unsigned char) *(*p)++; |
| |
| if (a < 128) |
| return a; /* ASCII, common case. */ |
| |
| return uv__utf8_decode1_slow(p, pe, a); |
| } |
| |
| #define foreach_codepoint(c, p, pe) \ |
| for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;) |
| |
| static int uv__idna_toascii_label(const char* s, const char* se, |
| char** d, char* de) { |
| static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789"; |
| const char* ss; |
| unsigned c; |
| unsigned h; |
| unsigned k; |
| unsigned n; |
| unsigned m; |
| unsigned q; |
| unsigned t; |
| unsigned x; |
| unsigned y; |
| unsigned bias; |
| unsigned delta; |
| unsigned todo; |
| int first; |
| |
| h = 0; |
| ss = s; |
| todo = 0; |
| |
| foreach_codepoint(c, &s, se) { |
| if (c < 128) |
| h++; |
| else if (c == (unsigned) -1) |
| return UV_EINVAL; |
| else |
| todo++; |
| } |
| |
| if (todo > 0) { |
| if (*d < de) *(*d)++ = 'x'; |
| if (*d < de) *(*d)++ = 'n'; |
| if (*d < de) *(*d)++ = '-'; |
| if (*d < de) *(*d)++ = '-'; |
| } |
| |
| x = 0; |
| s = ss; |
| foreach_codepoint(c, &s, se) { |
| if (c > 127) |
| continue; |
| |
| if (*d < de) |
| *(*d)++ = c; |
| |
| if (++x == h) |
| break; /* Visited all ASCII characters. */ |
| } |
| |
| if (todo == 0) |
| return h; |
| |
| /* Only write separator when we've written ASCII characters first. */ |
| if (h > 0) |
| if (*d < de) |
| *(*d)++ = '-'; |
| |
| n = 128; |
| bias = 72; |
| delta = 0; |
| first = 1; |
| |
| while (todo > 0) { |
| m = -1; |
| s = ss; |
| foreach_codepoint(c, &s, se) |
| if (c >= n) |
| if (c < m) |
| m = c; |
| |
| x = m - n; |
| y = h + 1; |
| |
| if (x > ~delta / y) |
| return UV_E2BIG; /* Overflow. */ |
| |
| delta += x * y; |
| n = m; |
| |
| s = ss; |
| foreach_codepoint(c, &s, se) { |
| if (c < n) |
| if (++delta == 0) |
| return UV_E2BIG; /* Overflow. */ |
| |
| if (c != n) |
| continue; |
| |
| for (k = 36, q = delta; /* empty */; k += 36) { |
| t = 1; |
| |
| if (k > bias) |
| t = k - bias; |
| |
| if (t > 26) |
| t = 26; |
| |
| if (q < t) |
| break; |
| |
| /* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore |
| * 10 <= y <= 35, we can optimize the long division |
| * into a table-based reciprocal multiplication. |
| */ |
| x = q - t; |
| y = 36 - t; /* 10 <= y <= 35 since 1 <= t <= 26. */ |
| q = x / y; |
| t = t + x % y; /* 1 <= t <= 35 because of y. */ |
| |
| if (*d < de) |
| *(*d)++ = alphabet[t]; |
| } |
| |
| if (*d < de) |
| *(*d)++ = alphabet[q]; |
| |
| delta /= 2; |
| |
| if (first) { |
| delta /= 350; |
| first = 0; |
| } |
| |
| /* No overflow check is needed because |delta| was just |
| * divided by 2 and |delta+delta >= delta + delta/h|. |
| */ |
| h++; |
| delta += delta / h; |
| |
| for (bias = 0; delta > 35 * 26 / 2; bias += 36) |
| delta /= 35; |
| |
| bias += 36 * delta / (delta + 38); |
| delta = 0; |
| todo--; |
| } |
| |
| delta++; |
| n++; |
| } |
| |
| return 0; |
| } |
| |
| #undef foreach_codepoint |
| |
| long uv__idna_toascii(const char* s, const char* se, char* d, char* de) { |
| const char* si; |
| const char* st; |
| unsigned c; |
| char* ds; |
| int rc; |
| |
| ds = d; |
| |
| for (si = s; si < se; /* empty */) { |
| st = si; |
| c = uv__utf8_decode1(&si, se); |
| |
| if (c != '.') |
| if (c != 0x3002) /* 。 */ |
| if (c != 0xFF0E) /* . */ |
| if (c != 0xFF61) /* 。 */ |
| continue; |
| |
| rc = uv__idna_toascii_label(s, st, &d, de); |
| |
| if (rc < 0) |
| return rc; |
| |
| if (d < de) |
| *d++ = '.'; |
| |
| s = si; |
| } |
| |
| if (s < se) { |
| rc = uv__idna_toascii_label(s, se, &d, de); |
| |
| if (rc < 0) |
| return rc; |
| } |
| |
| if (d < de) |
| *d++ = '\0'; |
| |
| return d - ds; /* Number of bytes written. */ |
| } |