| #include<stdlib.h> |
| #include<stdio.h> |
| #include<limits.h> |
| #include<string.h> |
| #include"compressed_rank.h" |
| #include"bitbool.h" |
| // #define DEBUG |
| #include"debug.h" |
| static inline cmph_uint32 compressed_rank_i_log2(cmph_uint32 x) |
| { |
| register cmph_uint32 res = 0; |
| |
| while(x > 1) |
| { |
| x >>= 1; |
| res++; |
| } |
| return res; |
| }; |
| |
| void compressed_rank_init(compressed_rank_t * cr) |
| { |
| cr->max_val = 0; |
| cr->n = 0; |
| cr->rem_r = 0; |
| select_init(&cr->sel); |
| cr->vals_rems = 0; |
| } |
| |
| void compressed_rank_destroy(compressed_rank_t * cr) |
| { |
| free(cr->vals_rems); |
| cr->vals_rems = 0; |
| select_destroy(&cr->sel); |
| } |
| |
| void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n) |
| { |
| register cmph_uint32 i,j; |
| register cmph_uint32 rems_mask; |
| register cmph_uint32 * select_vec = 0; |
| cr->n = n; |
| cr->max_val = vals_table[cr->n - 1]; |
| cr->rem_r = compressed_rank_i_log2(cr->max_val/cr->n); |
| if(cr->rem_r == 0) |
| { |
| cr->rem_r = 1; |
| } |
| select_vec = (cmph_uint32 *) calloc(cr->max_val >> cr->rem_r, sizeof(cmph_uint32)); |
| cr->vals_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cr->n, cr->rem_r), sizeof(cmph_uint32)); |
| rems_mask = (1U << cr->rem_r) - 1U; |
| |
| for(i = 0; i < cr->n; i++) |
| { |
| set_bits_value(cr->vals_rems, i, vals_table[i] & rems_mask, cr->rem_r, rems_mask); |
| } |
| |
| for(i = 1, j = 0; i <= cr->max_val >> cr->rem_r; i++) |
| { |
| while(i > (vals_table[j] >> cr->rem_r)) |
| { |
| j++; |
| } |
| select_vec[i - 1] = j; |
| }; |
| |
| |
| // FABIANO: before it was (cr->total_length >> cr->rem_r) + 1. But I wiped out the + 1 because |
| // I changed the select structure to work up to m, instead of up to m - 1. |
| select_generate(&cr->sel, select_vec, cr->max_val >> cr->rem_r, cr->n); |
| |
| free(select_vec); |
| } |
| |
| cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx) |
| { |
| register cmph_uint32 rems_mask; |
| register cmph_uint32 val_quot, val_rem; |
| register cmph_uint32 sel_res, rank; |
| |
| if(idx > cr->max_val) |
| { |
| return cr->n; |
| } |
| |
| val_quot = idx >> cr->rem_r; |
| rems_mask = (1U << cr->rem_r) - 1U; |
| val_rem = idx & rems_mask; |
| if(val_quot == 0) |
| { |
| rank = sel_res = 0; |
| } |
| else |
| { |
| sel_res = select_query(&cr->sel, val_quot - 1) + 1; |
| rank = sel_res - val_quot; |
| } |
| |
| do |
| { |
| if(GETBIT32(cr->sel.bits_vec, sel_res)) |
| { |
| break; |
| } |
| if(get_bits_value(cr->vals_rems, rank, cr->rem_r, rems_mask) >= val_rem) |
| { |
| break; |
| } |
| sel_res++; |
| rank++; |
| } while(1); |
| |
| return rank; |
| } |
| |
| cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr) |
| { |
| register cmph_uint32 space_usage = select_get_space_usage(&cr->sel); |
| space_usage += BITS_TABLE_SIZE(cr->n, cr->rem_r)*(cmph_uint32)sizeof(cmph_uint32)*8; |
| space_usage += 3*(cmph_uint32)sizeof(cmph_uint32)*8; |
| return space_usage; |
| } |
| |
| void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen) |
| { |
| register cmph_uint32 sel_size = select_packed_size(&(cr->sel)); |
| register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32); |
| register cmph_uint32 pos = 0; |
| char * buf_sel = 0; |
| cmph_uint32 buflen_sel = 0; |
| #ifdef DEBUG |
| cmph_uint32 i; |
| #endif |
| |
| *buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size; |
| |
| DEBUGP("sel_size = %u\n", sel_size); |
| DEBUGP("vals_rems_size = %u\n", vals_rems_size); |
| |
| *buf = (char *)calloc(*buflen, sizeof(char)); |
| |
| if (!*buf) |
| { |
| *buflen = UINT_MAX; |
| return; |
| } |
| |
| // dumping max_val, n and rem_r |
| memcpy(*buf, &(cr->max_val), sizeof(cmph_uint32)); |
| pos += (cmph_uint32)sizeof(cmph_uint32); |
| DEBUGP("max_val = %u\n", cr->max_val); |
| |
| memcpy(*buf + pos, &(cr->n), sizeof(cmph_uint32)); |
| pos += (cmph_uint32)sizeof(cmph_uint32); |
| DEBUGP("n = %u\n", cr->n); |
| |
| memcpy(*buf + pos, &(cr->rem_r), sizeof(cmph_uint32)); |
| pos += (cmph_uint32)sizeof(cmph_uint32); |
| DEBUGP("rem_r = %u\n", cr->rem_r); |
| |
| // dumping sel |
| select_dump(&cr->sel, &buf_sel, &buflen_sel); |
| memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32)); |
| pos += (cmph_uint32)sizeof(cmph_uint32); |
| DEBUGP("buflen_sel = %u\n", buflen_sel); |
| |
| memcpy(*buf + pos, buf_sel, buflen_sel); |
| |
| #ifdef DEBUG |
| i = 0; |
| for(i = 0; i < buflen_sel; i++) |
| { |
| DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i)); |
| } |
| #endif |
| pos += buflen_sel; |
| |
| free(buf_sel); |
| |
| // dumping vals_rems |
| memcpy(*buf + pos, cr->vals_rems, vals_rems_size); |
| #ifdef DEBUG |
| for(i = 0; i < vals_rems_size; i++) |
| { |
| DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(*buf + pos + i)); |
| } |
| #endif |
| pos += vals_rems_size; |
| |
| DEBUGP("Dumped compressed rank structure with size %u bytes\n", *buflen); |
| } |
| |
| void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen) |
| { |
| register cmph_uint32 pos = 0; |
| cmph_uint32 buflen_sel = 0; |
| register cmph_uint32 vals_rems_size = 0; |
| #ifdef DEBUG |
| cmph_uint32 i; |
| #endif |
| |
| // loading max_val, n, and rem_r |
| memcpy(&(cr->max_val), buf, sizeof(cmph_uint32)); |
| pos += (cmph_uint32)sizeof(cmph_uint32); |
| DEBUGP("max_val = %u\n", cr->max_val); |
| |
| memcpy(&(cr->n), buf + pos, sizeof(cmph_uint32)); |
| pos += (cmph_uint32)sizeof(cmph_uint32); |
| DEBUGP("n = %u\n", cr->n); |
| |
| memcpy(&(cr->rem_r), buf + pos, sizeof(cmph_uint32)); |
| pos += (cmph_uint32)sizeof(cmph_uint32); |
| DEBUGP("rem_r = %u\n", cr->rem_r); |
| |
| // loading sel |
| memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32)); |
| pos += (cmph_uint32)sizeof(cmph_uint32); |
| DEBUGP("buflen_sel = %u\n", buflen_sel); |
| |
| select_load(&cr->sel, buf + pos, buflen_sel); |
| #ifdef DEBUG |
| i = 0; |
| for(i = 0; i < buflen_sel; i++) |
| { |
| DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i)); |
| } |
| #endif |
| pos += buflen_sel; |
| |
| // loading vals_rems |
| if(cr->vals_rems) |
| { |
| free(cr->vals_rems); |
| } |
| vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r); |
| cr->vals_rems = (cmph_uint32 *) calloc(vals_rems_size, sizeof(cmph_uint32)); |
| vals_rems_size *= 4; |
| memcpy(cr->vals_rems, buf + pos, vals_rems_size); |
| |
| #ifdef DEBUG |
| for(i = 0; i < vals_rems_size; i++) |
| { |
| DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(buf + pos + i)); |
| } |
| #endif |
| pos += vals_rems_size; |
| |
| DEBUGP("Loaded compressed rank structure with size %u bytes\n", buflen); |
| } |
| |
| |
| |
| void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed) |
| { |
| if (cr && cr_packed) |
| { |
| char *buf = NULL; |
| cmph_uint32 buflen = 0; |
| compressed_rank_dump(cr, &buf, &buflen); |
| memcpy(cr_packed, buf, buflen); |
| free(buf); |
| } |
| } |
| |
| cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr) |
| { |
| register cmph_uint32 sel_size = select_packed_size(&cr->sel); |
| register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32); |
| return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size; |
| } |
| |
| cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx) |
| { |
| // unpacking cr_packed |
| register cmph_uint32 *ptr = (cmph_uint32 *)cr_packed; |
| register cmph_uint32 max_val = *ptr++; |
| register cmph_uint32 n = *ptr++; |
| register cmph_uint32 rem_r = *ptr++; |
| register cmph_uint32 buflen_sel = *ptr++; |
| register cmph_uint32 * sel_packed = ptr; |
| |
| register cmph_uint32 * bits_vec = sel_packed + 2; // skipping n and m |
| |
| register cmph_uint32 * vals_rems = (ptr += (buflen_sel >> 2)); |
| |
| // compressed sequence query computation |
| register cmph_uint32 rems_mask; |
| register cmph_uint32 val_quot, val_rem; |
| register cmph_uint32 sel_res, rank; |
| |
| if(idx > max_val) |
| { |
| return n; |
| } |
| |
| val_quot = idx >> rem_r; |
| rems_mask = (1U << rem_r) - 1U; |
| val_rem = idx & rems_mask; |
| if(val_quot == 0) |
| { |
| rank = sel_res = 0; |
| } |
| else |
| { |
| sel_res = select_query_packed(sel_packed, val_quot - 1) + 1; |
| rank = sel_res - val_quot; |
| } |
| |
| do |
| { |
| if(GETBIT32(bits_vec, sel_res)) |
| { |
| break; |
| } |
| if(get_bits_value(vals_rems, rank, rem_r, rems_mask) >= val_rem) |
| { |
| break; |
| } |
| sel_res++; |
| rank++; |
| } while(1); |
| |
| return rank; |
| } |
| |
| |
| |