| /*************************************************************************/ |
| /* */ |
| /* Language Technologies Institute */ |
| /* Carnegie Mellon University */ |
| /* Copyright (c) 1999 */ |
| /* All Rights Reserved. */ |
| /* */ |
| /* Permission is hereby granted, free of charge, to use and distribute */ |
| /* this software and its documentation without restriction, including */ |
| /* without limitation the rights to use, copy, modify, merge, publish, */ |
| /* distribute, sublicense, and/or sell copies of this work, and to */ |
| /* permit persons to whom this work is furnished to do so, subject to */ |
| /* the following conditions: */ |
| /* 1. The code must retain the above copyright notice, this list of */ |
| /* conditions and the following disclaimer. */ |
| /* 2. Any modifications must be clearly marked as such. */ |
| /* 3. Original authors' names are not deleted. */ |
| /* 4. The authors' names are not used to endorse or promote products */ |
| /* derived from this software without specific prior written */ |
| /* permission. */ |
| /* */ |
| /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */ |
| /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ |
| /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ |
| /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */ |
| /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ |
| /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ |
| /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ |
| /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ |
| /* THIS SOFTWARE. */ |
| /* */ |
| /*************************************************************************/ |
| /* Author: Alan W Black (awb@cs.cmu.edu) */ |
| /* Date: December 1999 */ |
| /*************************************************************************/ |
| /* */ |
| /* Typed values */ |
| /* */ |
| /*************************************************************************/ |
| #include "cst_math.h" |
| #include "cst_file.h" |
| #include "cst_val.h" |
| #include "cst_string.h" |
| #include "cst_tokenstream.h" |
| |
| static cst_val *new_val() |
| { |
| return cst_alloc(struct cst_val_struct,1); |
| } |
| |
| cst_val *int_val(int i) |
| { |
| cst_val *v = new_val(); |
| CST_VAL_TYPE(v) = CST_VAL_TYPE_INT; |
| CST_VAL_INT(v) = i; |
| return v; |
| } |
| |
| cst_val *float_val(float f) |
| { |
| cst_val *v = new_val(); |
| CST_VAL_TYPE(v) = CST_VAL_TYPE_FLOAT; |
| CST_VAL_FLOAT(v) = f; |
| return v; |
| } |
| |
| cst_val *string_val(const char *s) |
| { |
| cst_val *v = new_val(); |
| CST_VAL_TYPE(v) = CST_VAL_TYPE_STRING; |
| /* would be nice to note if this is a deletable string or not */ |
| CST_VAL_STRING_LVAL(v) = cst_strdup(s); |
| return v; |
| } |
| |
| cst_val *cons_val(const cst_val *a, const cst_val *b) |
| { |
| cst_val *v = new_val(); |
| CST_VAL_CAR(v)=((!a || cst_val_consp(a)) ? |
| (cst_val *)(void *)a:val_inc_refcount(a)); |
| CST_VAL_CDR(v)=((!b || cst_val_consp(b)) ? |
| (cst_val *)(void *)b:val_inc_refcount(b)); |
| return v; |
| } |
| |
| cst_val *val_new_typed(int type,void *vv) |
| { |
| cst_val *v = new_val(); |
| CST_VAL_TYPE(v) = type; |
| CST_VAL_VOID(v) = vv; |
| return v; |
| } |
| |
| void delete_val_list(cst_val *v) |
| { |
| if (v) |
| { |
| if (cst_val_consp(v)) |
| { |
| delete_val_list(CST_VAL_CDR(v)); |
| cst_free(v); |
| } |
| else |
| delete_val(v); |
| } |
| } |
| |
| void delete_val(cst_val *v) |
| { |
| if (v) |
| { |
| if (cst_val_consp(v)) |
| { |
| delete_val(CST_VAL_CAR(v)); |
| delete_val(CST_VAL_CDR(v)); |
| cst_free(v); |
| } |
| else if (val_dec_refcount(v) == 0) |
| { |
| if (CST_VAL_TYPE(v) == CST_VAL_TYPE_STRING) |
| cst_free(CST_VAL_VOID(v)); |
| else if (CST_VAL_TYPE(v) >= CST_VAL_TYPE_FIRST_FREE) |
| { |
| if (cst_val_defs[CST_VAL_TYPE(v)/2].delete_function) |
| (cst_val_defs[CST_VAL_TYPE(v)/2].delete_function) |
| (CST_VAL_VOID(v)); |
| } |
| cst_free(v); |
| } |
| } |
| } |
| |
| /* Accessor functions */ |
| int val_int(const cst_val *v) |
| { |
| if (v && (CST_VAL_TYPE(v) == CST_VAL_TYPE_INT)) |
| return CST_VAL_INT(v); |
| else if (v && (CST_VAL_TYPE(v) == CST_VAL_TYPE_FLOAT)) |
| return (int)CST_VAL_FLOAT(v); |
| else if (v && (CST_VAL_TYPE(v) == CST_VAL_TYPE_STRING)) |
| return atoi(CST_VAL_STRING(v)); |
| else |
| { |
| cst_errmsg("VAL: tried to access int in %d typed val\n", |
| (v ? CST_VAL_TYPE(v) : -1)); |
| cst_error(); |
| } |
| return 0; |
| } |
| |
| float val_float(const cst_val *v) |
| { |
| if (v && (CST_VAL_TYPE(v) == CST_VAL_TYPE_INT)) |
| return (float)CST_VAL_INT(v); |
| else if (v && (CST_VAL_TYPE(v) == CST_VAL_TYPE_FLOAT)) |
| return CST_VAL_FLOAT(v); |
| else if (v && (CST_VAL_TYPE(v) == CST_VAL_TYPE_STRING)) |
| return cst_atof(CST_VAL_STRING(v)); |
| else |
| { |
| cst_errmsg("VAL: tried to access float in %d typed val\n", |
| (v ? CST_VAL_TYPE(v) : -1)); |
| cst_error(); |
| } |
| return 0; |
| } |
| |
| const char *val_string(const cst_val *v) |
| { |
| if (v && (CST_VAL_TYPE(v) == CST_VAL_TYPE_STRING)) |
| return CST_VAL_STRING(v); |
| else |
| { |
| cst_errmsg("VAL: tried to access string in %d typed val\n", |
| (v ? CST_VAL_TYPE(v) : -1)); |
| cst_error(); |
| } |
| return 0; |
| } |
| |
| const cst_val *val_car(const cst_val *v) |
| { |
| if (v && cst_val_consp(v)) |
| return CST_VAL_CAR(v); |
| else |
| { |
| cst_errmsg("VAL: tried to access car in %d typed val\n", |
| (v ? CST_VAL_TYPE(v) : -1)); |
| cst_error(); |
| } |
| return 0; |
| } |
| |
| const cst_val *val_cdr(const cst_val *v) |
| { |
| if (v && cst_val_consp(v)) |
| return CST_VAL_CDR(v); |
| else |
| { |
| cst_errmsg("VAL: tried to access cdr in %d typed val\n", |
| (v ? CST_VAL_TYPE(v) : -1)); |
| cst_error(); |
| } |
| return 0; |
| } |
| |
| void *val_generic(const cst_val *v, int type, const char *stype) |
| { /* a generic access function that checks the expected type */ |
| if (v && CST_VAL_TYPE(v) == type) |
| return CST_VAL_VOID(v); |
| else |
| { |
| cst_errmsg("VAL: tried to access %s in %d type val\n", |
| stype, |
| (v ? CST_VAL_TYPE(v) : -1)); |
| cst_error(); |
| } |
| return NULL; |
| } |
| |
| void *val_void(const cst_val *v) |
| { |
| /* The scary, do anything function, this shouldn't be called by mortals */ |
| if ((v == NULL) || |
| (CST_VAL_TYPE(v) == CST_VAL_TYPE_CONS) || |
| (CST_VAL_TYPE(v) == CST_VAL_TYPE_INT) || |
| (CST_VAL_TYPE(v) == CST_VAL_TYPE_FLOAT)) |
| { |
| cst_errmsg("VAL: tried to access void in %d typed val\n", |
| (v ? CST_VAL_TYPE(v) : -1)); |
| cst_error(); |
| return NULL; |
| } |
| else |
| return CST_VAL_VOID(v); |
| } |
| |
| int cst_val_consp(const cst_val *v) |
| { |
| /* To keep a val cell down to 8 bytes we identify non-cons cells */ |
| /* with non-zero values in the least significant bit of the first */ |
| /* address in the cell (this is a standard technique used on Lisp */ |
| /* machines) */ |
| #if 0 |
| void *t; |
| int t1; |
| |
| /* Hmm this still isn't right (it can be) but this isn't it */ |
| t = CST_VAL_CAR(v); |
| t1 = *(int *)&t; |
| |
| if ((t1&0x1) == 0) |
| return TRUE; |
| else |
| return FALSE; |
| #endif |
| const cst_val_atom *t; |
| |
| t = (const cst_val_atom *)v; |
| if (t->type % 2 == 0) |
| return TRUE; |
| else |
| return FALSE; |
| } |
| |
| const cst_val *set_cdr(cst_val *v1, const cst_val *v2) |
| { |
| /* destructive set cdr, be careful you have a pointer to current cdr */ |
| |
| if (!cst_val_consp(v1)) |
| { |
| cst_errmsg("VAL: tried to set cdr of non-consp cell\n"); |
| cst_error(); |
| return NULL; |
| } |
| else |
| { |
| if (CST_VAL_CDR(v1)) |
| { |
| val_dec_refcount(CST_VAL_CDR(v1)); |
| val_inc_refcount(v1); |
| } |
| CST_VAL_CDR(v1) = (cst_val *)v2; |
| } |
| return v1; |
| } |
| |
| const cst_val *set_car(cst_val *v1, const cst_val *v2) |
| { |
| /* destructive set car, be careful you have a pointer to current car */ |
| |
| if (!cst_val_consp(v1)) |
| { |
| cst_errmsg("VAL: tried to set car of non-consp cell\n"); |
| cst_error(); |
| return NULL; |
| } |
| else |
| { |
| val_dec_refcount(CST_VAL_CAR(v1)); |
| val_inc_refcount(v1); |
| CST_VAL_CAR(v1) = (cst_val *)v2; |
| } |
| return v1; |
| } |
| |
| void val_print(cst_file fd,const cst_val *v) |
| { |
| const cst_val *p; |
| |
| if (v == NULL) |
| cst_fprintf(fd,"[null]"); |
| else if (CST_VAL_TYPE(v) == CST_VAL_TYPE_INT) |
| cst_fprintf(fd,"%d",val_int(v)); |
| else if (CST_VAL_TYPE(v) == CST_VAL_TYPE_FLOAT) |
| cst_fprintf(fd,"%f",val_float(v)); |
| else if (CST_VAL_TYPE(v) == CST_VAL_TYPE_STRING) |
| cst_fprintf(fd,"%s",val_string(v)); |
| else if (cst_val_consp(v)) |
| { |
| cst_fprintf(fd,"("); |
| for (p=v; p; ) |
| { |
| val_print(fd,val_car(p)); |
| p=val_cdr(p); |
| if (p) |
| cst_fprintf(fd," "); |
| if (p && !cst_val_consp(p)) /* dotted pairs for non-list */ |
| { |
| cst_fprintf(fd,". "); |
| val_print(fd,p); |
| break; |
| } |
| } |
| cst_fprintf(fd,")"); |
| } |
| else |
| cst_fprintf(fd,"[Val %s 0x%p]", |
| cst_val_defs[CST_VAL_TYPE(v)/2].name,CST_VAL_VOID(v)); |
| } |
| |
| cst_val *val_reverse(cst_val *l) |
| { /* destructively reverse the list */ |
| cst_val *n,*np,*nl; |
| for (nl=0,n=l; n; nl=n,n=np) |
| { |
| np=CST_VAL_CDR(n); |
| CST_VAL_CDR(n) = nl; |
| } |
| return nl; |
| } |
| |
| cst_val *val_append(cst_val *l1, cst_val *l2) |
| { |
| /* Destructively add l2 to the end of l1 return l1 */ |
| cst_val *t; |
| |
| if (l1 == 0) |
| return l2; |
| else |
| { |
| for (t=l1; val_cdr(t); t=CST_VAL_CDR(t)); |
| CST_VAL_CDR(t) = l2; |
| return l1; |
| } |
| } |
| |
| int val_length(const cst_val *l) |
| { |
| const cst_val *n; |
| int i; |
| |
| for (i=0,n=l; n; n=val_cdr(n)) |
| i++; |
| |
| return i; |
| } |
| |
| int val_equal(const cst_val *v1, const cst_val *v2) |
| { |
| if (v1 == v2) |
| return TRUE; /* its eq so its equal */ |
| else if (v1 == 0) |
| return FALSE; |
| else if (CST_VAL_TYPE(v1) == CST_VAL_TYPE(v2)) |
| { |
| if (cst_val_consp(v1)) |
| return ((val_equal(val_car(v1),val_car(v2))) && |
| (val_equal(val_cdr(v1),val_cdr(v2)))); |
| else if (CST_VAL_TYPE(v1) == CST_VAL_TYPE_INT) |
| return (val_int(v1) == val_int(v2)); |
| else if (CST_VAL_TYPE(v1) == CST_VAL_TYPE_FLOAT) |
| return (val_float(v1) == val_float(v2)); |
| else if (CST_VAL_TYPE(v1) == CST_VAL_TYPE_STRING) |
| return (cst_streq(CST_VAL_STRING(v1),CST_VAL_STRING(v2))); |
| else |
| return CST_VAL_VOID(v1) == CST_VAL_VOID(v2); |
| } |
| else |
| return FALSE; |
| } |
| |
| int val_less(const cst_val *v1, const cst_val *v2) |
| { |
| return val_float(v1) < val_float(v2); |
| } |
| |
| int val_greater(const cst_val *v1,const cst_val *v2) |
| { |
| return val_float(v1) > val_float(v2); |
| } |
| |
| int val_member(const cst_val *v1,const cst_val *l) |
| { |
| const cst_val *i; |
| |
| for (i=l; i; i=val_cdr(i)) |
| { |
| if (val_equal(val_car(i),v1)) |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| int val_member_string(const char *v1,const cst_val *l) |
| { |
| const cst_val *i; |
| |
| for (i=l; i; i=val_cdr(i)) |
| { |
| if (cst_streq(v1,val_string(val_car(i)))) |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| cst_val *val_inc_refcount(const cst_val *b) |
| { |
| cst_val *wb; |
| |
| /* Well I was lying, they're not really const, but this is the place */ |
| /* where breaking const is reasonable */ |
| wb = (cst_val *)(void *)b; |
| |
| if (CST_VAL_REFCOUNT(wb) == -1) |
| /* or is a cons cell in the text segment, how do I do that ? */ |
| return wb; |
| else if (!cst_val_consp(wb)) /* we don't ref count cons cells */ |
| CST_VAL_REFCOUNT(wb) += 1; |
| return wb; |
| } |
| |
| int val_dec_refcount(const cst_val *b) |
| { |
| cst_val *wb; |
| |
| wb = (cst_val *)(void *)b; |
| |
| if (CST_VAL_REFCOUNT(wb) == -1) |
| /* or is a cons cell in the text segment, how do I do that ? */ |
| return -1; |
| else if (cst_val_consp(wb)) /* we don't ref count cons cells */ |
| return 0; |
| else if (CST_VAL_REFCOUNT(wb) == 0) |
| { |
| /* Otherwise, trying to free a val outside an |
| item/relation/etc has rather the opposite effect from what |
| you might have intended... */ |
| return 0; |
| } |
| else |
| { |
| CST_VAL_REFCOUNT(wb) -= 1; |
| return CST_VAL_REFCOUNT(wb); |
| } |
| } |
| |
| int utf8_sequence_length(char c0) |
| { |
| // Get the expected length of UTF8 sequence given its most |
| // significant byte |
| return (( 0xE5000000 >> (( c0 >> 3 ) & 0x1E )) & 3 ) + 1; |
| } |
| |
| cst_val *cst_utf8_explode(const cst_string *utf8string) |
| { |
| // Return a list of utf8 characters as strings |
| cst_val *chars=NULL; |
| |
| const unsigned char *str = (const unsigned char*)utf8string; |
| char utf8char[5]; |
| char c0; |
| int charlength; |
| |
| while ((c0 = *str)) |
| { |
| charlength = utf8_sequence_length(c0); |
| snprintf(utf8char, charlength + 1, "%s", str); |
| chars = cons_val(string_val(utf8char),chars); |
| str += charlength; |
| } |
| return val_reverse(chars); |
| } |
| |
| static int utf8_ord(const char *utf8_seq) { |
| unsigned int len; |
| int ord; |
| |
| unsigned char c0, c1, c2, c3; // Potential bytes in the UTF8 symbol |
| c0 = utf8_seq[0]; |
| len = utf8_sequence_length(c0); |
| |
| // Make sure the string sequence we received matches with the |
| // expected length, and that the expected length is nonzero. |
| if ( (len == 0) || |
| (len != strlen(utf8_seq))) { |
| return -1; |
| } |
| |
| if (len == 1) { |
| // ASCII sequence. |
| return c0; |
| } |
| |
| c1 = utf8_seq[1]; |
| if (len == 2) { |
| ord = ((c0 & 0x1F) << 6) | (c1 & 0x3F); |
| if (ord < 0x80) |
| return -1; |
| return ord; |
| } |
| |
| c2 = utf8_seq[2]; |
| if (len == 3) { |
| if ((c2 & 0xC0) != 0x80) |
| return -1; |
| ord = ((c0 & 0x0F) << 12) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); |
| if (ord < 0x800 || |
| (ord >= 0xD800 && ord <= 0xDFFF)) |
| return -1; |
| return ord; |
| } |
| |
| c3 = utf8_seq[3]; |
| if (len == 4) { |
| if ((c3 & 0xC0) != 0x80) |
| return -1; |
| ord = |
| ((c0 & 0x7) << 18) | ((c1 & 0x3F) << 12) | |
| ((c2 & 0x3F) << 6) | (c3 & 0x3F); |
| if (ord < 0x10000 || ord > 0x10FFFF) |
| return -1; |
| return ord; |
| } |
| |
| return -1; |
| } |
| |
| cst_val *cst_utf8_ord(const cst_val *utf8_char) { |
| const char *ch=(const char *)val_string(utf8_char); |
| return int_val(utf8_ord(ch)); |
| } |
| int cst_utf8_ord_string(const char *utf8_char) |
| { |
| return utf8_ord(utf8_char); |
| } |
| |
| static int utf8_chr(int ord, char* utf8char) { |
| unsigned int utf8len; |
| int i = 0; |
| |
| if (ord < 0x80) { |
| utf8len = 1; |
| } else if (ord < 0x800) { |
| utf8len = 2; |
| } else if (ord <= 0xFFFF) { |
| utf8len = 3; |
| } else if (ord <= 0x200000) { |
| utf8len = 4; |
| } else { |
| // Replace invalid character with FFFD |
| utf8len = 2; |
| ord = 0xFFFD; |
| } |
| |
| i = utf8len; // Index into utf8char |
| utf8char[i--] = 0; |
| |
| switch (utf8len) { |
| // These fallthrough deliberately |
| case 6: |
| utf8char[i--] = (ord | 0x80) & 0xBF; |
| ord >>= 6; |
| case 5: |
| utf8char[i--] = (ord | 0x80) & 0xBF; |
| ord >>= 6; |
| case 4: |
| utf8char[i--] = (ord | 0x80) & 0xBF; |
| ord >>= 6; |
| case 3: |
| utf8char[i--] = (ord | 0x80) & 0xBF; |
| ord >>= 6; |
| case 2: |
| utf8char[i--] = (ord | 0x80) & 0xBF; |
| ord >>= 6; |
| case 1: |
| switch (utf8len) { |
| case 0: |
| case 1: |
| utf8char[i--] = ord; |
| break; |
| case 2: |
| utf8char[i--] = ord | 0xC0; |
| break; |
| case 3: |
| utf8char[i--] = ord | 0xE0; |
| break; |
| case 4: |
| utf8char[i--] = ord | 0xF0; |
| } |
| } |
| return utf8len; |
| } |
| |
| cst_val *cst_utf8_chr(const cst_val *ord) { |
| char ch[5]; |
| int utf8len; |
| |
| utf8len = utf8_chr(val_int(ord),ch); |
| if (utf8len == 0) { |
| return 0; |
| } |
| |
| return string_val(ch); |
| } |
| |
| int val_stringp(const cst_val *v) |
| { |
| if (cst_val_consp(v)) |
| return FALSE; |
| else if (CST_VAL_TYPE(v) == CST_VAL_TYPE_STRING) |
| return TRUE; |
| else |
| return FALSE; |
| } |
| |
| const cst_val *val_assoc_string(const char *v1,const cst_val *al) |
| { |
| const cst_val *i; |
| |
| for (i=al; i; i=val_cdr(i)) |
| { |
| if (cst_streq(v1,val_string(val_car(val_car(i))))) |
| return val_car(i); |
| } |
| return NULL; |
| } |
| |
| cst_string *cst_implode(const cst_val *sl) |
| { |
| const cst_val *v; |
| int l=0; |
| char *s; |
| |
| for (v=sl; v; v=val_cdr(v)) |
| { |
| if (val_stringp(val_car(v))) |
| l += cst_strlen(val_string(val_car(v))); |
| } |
| |
| s = cst_alloc(cst_string,l+1); |
| |
| for (v=sl; v; v=val_cdr(v)) |
| { |
| if (val_stringp(val_car(v))) |
| cst_sprintf(s,"%s%s",s,val_string(val_car(v))); |
| |
| } |
| |
| return s; |
| } |
| |
| cst_val *val_readlist_string(const char *str) |
| { /* not fully general but a good start */ |
| cst_tokenstream *ts; |
| cst_val *v = NULL; |
| const char *p; |
| |
| ts = ts_open_string(str, |
| cst_ts_default_whitespacesymbols, |
| "", |
| "", |
| ""); |
| |
| while (!ts_eof(ts)) |
| { |
| p = ts_get(ts); |
| v = cons_val(string_val(p),v); |
| } |
| |
| ts_close(ts); |
| |
| return val_reverse(v); |
| } |
| |
| |
| |