383 lines
10 KiB
C
383 lines
10 KiB
C
#ifndef lint
|
|
static char *rcsid = "$Id: ucsset.c,v 1.1 2001/06/09 00:30:27 tale Exp $";
|
|
#endif
|
|
|
|
/*
|
|
* Copyright (c) 2001 Japan Network Information Center. All rights reserved.
|
|
*
|
|
* By using this file, you agree to the terms and conditions set forth bellow.
|
|
*
|
|
* LICENSE TERMS AND CONDITIONS
|
|
*
|
|
* The following License Terms and Conditions apply, unless a different
|
|
* license is obtained from Japan Network Information Center ("JPNIC"),
|
|
* a Japanese association, Fuundo Bldg., 1-2 Kanda Ogawamachi, Chiyoda-ku,
|
|
* Tokyo, Japan.
|
|
*
|
|
* 1. Use, Modification and Redistribution (including distribution of any
|
|
* modified or derived work) in source and/or binary forms is permitted
|
|
* under this License Terms and Conditions.
|
|
*
|
|
* 2. Redistribution of source code must retain the copyright notices as they
|
|
* appear in each source code file, this License Terms and Conditions.
|
|
*
|
|
* 3. Redistribution in binary form must reproduce the Copyright Notice,
|
|
* this License Terms and Conditions, in the documentation and/or other
|
|
* materials provided with the distribution. For the purposes of binary
|
|
* distribution the "Copyright Notice" refers to the following language:
|
|
* "Copyright (c) Japan Network Information Center. All rights reserved."
|
|
*
|
|
* 4. Neither the name of JPNIC may be used to endorse or promote products
|
|
* derived from this Software without specific prior written approval of
|
|
* JPNIC.
|
|
*
|
|
* 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
|
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
|
*
|
|
* 6. Indemnification by Licensee
|
|
* Any person or entities using and/or redistributing this Software under
|
|
* this License Terms and Conditions shall defend indemnify and hold
|
|
* harmless JPNIC from and against any and all judgements damages,
|
|
* expenses, settlement liabilities, cost and other liabilities of any
|
|
* kind as a result of use and redistribution of this Software or any
|
|
* claim, suite, action, litigation or proceeding by any third party
|
|
* arising out of or relates to this License Terms and Conditions.
|
|
*
|
|
* 7. Governing Law, Jurisdiction and Venue
|
|
* This License Terms and Conditions shall be governed by and and
|
|
* construed in accordance with the law of Japan. Any person or entities
|
|
* using and/or redistributing this Software under this License Terms and
|
|
* Conditions hereby agrees and consent to the personal and exclusive
|
|
* jurisdiction and venue of Tokyo District Court of Japan.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include <mdn/result.h>
|
|
#include <mdn/assert.h>
|
|
#include <mdn/logmacro.h>
|
|
#include <mdn/ucsset.h>
|
|
|
|
#define UCS_MAX 0x80000000UL
|
|
|
|
#define INIT_SIZE 50
|
|
|
|
/*
|
|
* Code point range.
|
|
*
|
|
* The set of code points is represented by an array of code point ranges.
|
|
* In the building phase, specified ranges by 'mdn_ucsset_add' or
|
|
* 'mdn_ucsset_addrange' are simply appended to the array.
|
|
* And 'mdn_ucsset_fix' sorts the array by the code point value, and also
|
|
* merges any intersecting ranges. Since the array is sorted, a binary
|
|
* search can be used for looking up.
|
|
*/
|
|
typedef struct {
|
|
unsigned long from;
|
|
unsigned long to;
|
|
} range_t;
|
|
|
|
/*
|
|
* Code point segment.
|
|
*
|
|
* To speed up searching further, the entire region of UCS-4 code points
|
|
* (U+0000 - U+7FFFFFFF) are divided into segments. For each segment,
|
|
* the first and last element of the range array corresponding to the
|
|
* segment are computed by 'mdn_ucsset_fix'. This narrows down the
|
|
* (initial) search range.
|
|
*/
|
|
typedef struct {
|
|
int range_start; /* index of ucsset.ranges */
|
|
int range_end; /* ditto */
|
|
} segment_t;
|
|
|
|
/*
|
|
* Code point to segment index conversion.
|
|
*
|
|
* Below is the function that maps a code point to the corresponding segment.
|
|
* The mapping is non-uniform, so that BMP, the following 16 planes that
|
|
* comprise Unicode code points together with BMP, and other planes
|
|
* have different granularity.
|
|
*/
|
|
#define SEG_THLD1 0x10000 /* BMP */
|
|
#define SEG_THLD2 0x110000 /* Unicode (BMP+16planes) */
|
|
#define SEG_SFT1 10 /* BMP: 1K code points/segment */
|
|
#define SEG_SFT2 14 /* following 16 planes: 16K cp/seg */
|
|
#define SEG_SFT3 24 /* rest: 16M cp/seg */
|
|
#define SEG_OFF1 (SEG_THLD1 >> SEG_SFT1)
|
|
#define SEG_OFF2 (((SEG_THLD2 - SEG_THLD1) >> SEG_SFT2) + SEG_OFF1)
|
|
#define SEG_INDEX(v) \
|
|
(((v) < SEG_THLD1) ? ((v) >> SEG_SFT1) : \
|
|
((v) < SEG_THLD2) ? ((((v) - SEG_THLD1) >> SEG_SFT2) + SEG_OFF1) : \
|
|
((((v) - SEG_THLD2) >> SEG_SFT3) + SEG_OFF2))
|
|
#define SEG_LEN (SEG_INDEX(UCS_MAX - 1) + 1)
|
|
|
|
/*
|
|
* Representation of set of UCS code points.
|
|
*/
|
|
typedef struct mdn_ucsset {
|
|
segment_t segments[SEG_LEN];
|
|
int fixed;
|
|
int size; /* allocated size of 'ranges' */
|
|
int nranges; /* num of ranges */
|
|
range_t *ranges;
|
|
int refcnt; /* reference count */
|
|
} ucsset;
|
|
|
|
static mdn_result_t addrange(mdn_ucsset_t ctx, unsigned long from,
|
|
unsigned long to, char *func_name);
|
|
static int comp_range(const void *v1, const void *v2);
|
|
|
|
mdn_result_t
|
|
mdn_ucsset_create(mdn_ucsset_t *ctx) {
|
|
mdn_ucsset_t bm;
|
|
|
|
assert(ctx != NULL);
|
|
|
|
TRACE(("mdn_ucsset_create()\n"));
|
|
|
|
if ((bm = malloc(sizeof(ucsset))) == NULL) {
|
|
WARNING(("mdn_ucsset_create: malloc failed\n"));
|
|
return mdn_nomemory;
|
|
}
|
|
bm->size = bm->nranges = 0;
|
|
bm->ranges = NULL;
|
|
bm->fixed = 0;
|
|
bm->refcnt = 1;
|
|
*ctx = bm;
|
|
return (mdn_success);
|
|
}
|
|
|
|
void
|
|
mdn_ucsset_destroy(mdn_ucsset_t ctx) {
|
|
assert(ctx != NULL && ctx->refcnt > 0);
|
|
|
|
TRACE(("mdn_ucsset_destroy()\n"));
|
|
|
|
if (--ctx->refcnt == 0) {
|
|
if (ctx->ranges != NULL)
|
|
free(ctx->ranges);
|
|
free(ctx);
|
|
}
|
|
}
|
|
|
|
void
|
|
mdn_ucsset_incrref(mdn_ucsset_t ctx) {
|
|
assert(ctx != NULL && ctx->refcnt > 0);
|
|
|
|
TRACE(("mdn_ucsset_incrref()\n"));
|
|
|
|
ctx->refcnt++;
|
|
}
|
|
|
|
mdn_result_t
|
|
mdn_ucsset_add(mdn_ucsset_t ctx, unsigned long v) {
|
|
assert(ctx != NULL && ctx->refcnt > 0);
|
|
|
|
TRACE(("mdn_ucsset_add(v=U+%lX)\n", v));
|
|
|
|
return (addrange(ctx, v, v, "mdn_ucsset_add"));
|
|
}
|
|
|
|
mdn_result_t
|
|
mdn_ucsset_addrange(mdn_ucsset_t ctx, unsigned long from,
|
|
unsigned long to)
|
|
{
|
|
assert(ctx != NULL && ctx->refcnt > 0);
|
|
|
|
TRACE(("mdn_ucsset_addrange(from=U+%lX, to=U+%lX)\n",
|
|
from, to));
|
|
|
|
return (addrange(ctx, from, to, "mdn_ucsset_addrange"));
|
|
}
|
|
|
|
void
|
|
mdn_ucsset_fix(mdn_ucsset_t ctx) {
|
|
int nranges;
|
|
range_t *ranges;
|
|
segment_t *segments;
|
|
int i, j;
|
|
|
|
assert(ctx != NULL && ctx->refcnt > 0);
|
|
|
|
TRACE(("mdn_ucsset_fix()\n"));
|
|
|
|
nranges = ctx->nranges;
|
|
ranges = ctx->ranges;
|
|
segments = ctx->segments;
|
|
|
|
if (ctx->fixed)
|
|
return;
|
|
|
|
ctx->fixed = 1;
|
|
|
|
/* Initialize segment array */
|
|
for (i = 0; i < SEG_LEN; i++) {
|
|
segments[i].range_start = -1;
|
|
segments[i].range_end = -1;
|
|
}
|
|
|
|
/* If the set is empty, there's nothing to be done. */
|
|
if (nranges == 0)
|
|
return;
|
|
|
|
/* Sort ranges. */
|
|
qsort(ranges, nranges, sizeof(range_t), comp_range);
|
|
|
|
/* Merge overlapped/continuous ranges. */
|
|
for (i = 0, j = 1; j < nranges; j++) {
|
|
if (ranges[i].to + 1 >= ranges[j].from) {
|
|
/* can be merged */
|
|
if (ranges[i].to < ranges[j].to) {
|
|
ranges[i].to = ranges[j].to;
|
|
}
|
|
} else {
|
|
i++;
|
|
if (i < j)
|
|
ranges[i] = ranges[j];
|
|
}
|
|
}
|
|
/* 'i' points the last range in the array. */
|
|
ctx->nranges = nranges = ++i;
|
|
|
|
/* Create segment array. */
|
|
for (i = 0; i < nranges; i++) {
|
|
int fidx = SEG_INDEX(ranges[i].from);
|
|
int tidx = SEG_INDEX(ranges[i].to);
|
|
|
|
for (j = fidx; j <= tidx; j++) {
|
|
if (segments[j].range_start < 0)
|
|
segments[j].range_start = i;
|
|
segments[j].range_end = i;
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
/*
|
|
* Does the standard guarantee realloc() always succeeds
|
|
* when shrinking?
|
|
*/
|
|
/* Shrink malloc'ed space if possible. */
|
|
ctx->ranges = realloc(ctx->ranges, ctx->nranges * sizeof(range_t));
|
|
#endif
|
|
}
|
|
|
|
mdn_result_t
|
|
mdn_ucsset_lookup(mdn_ucsset_t ctx, unsigned long v, int *found) {
|
|
int idx;
|
|
segment_t *segments;
|
|
|
|
assert(ctx != NULL && ctx->refcnt > 0 && found != NULL);
|
|
|
|
TRACE(("mdn_ucsset_lookup(v=U+%lX)\n", v));
|
|
|
|
/* Make sure it is fixed. */
|
|
if (!ctx->fixed) {
|
|
WARNING(("mdn_ucsset_lookup: not fixed yet\n"));
|
|
return (mdn_failure);
|
|
}
|
|
|
|
/* Check the given code point. */
|
|
if (v >= UCS_MAX)
|
|
return (mdn_invalid_codepoint);
|
|
|
|
/* Get the segment 'v' belongs to. */
|
|
segments = ctx->segments;
|
|
idx = SEG_INDEX(v);
|
|
|
|
/* Do binary search. */
|
|
*found = 0;
|
|
if (segments[idx].range_start >= 0) {
|
|
int lo = segments[idx].range_start;
|
|
int hi = segments[idx].range_end;
|
|
range_t *ranges = ctx->ranges;
|
|
|
|
while (lo <= hi) {
|
|
int mid = (lo + hi) / 2;
|
|
if (v < ranges[mid].from) {
|
|
hi = mid - 1;
|
|
} else if (v > ranges[mid].to) {
|
|
lo = mid + 1;
|
|
} else {
|
|
*found = 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return (mdn_success);
|
|
}
|
|
|
|
static mdn_result_t
|
|
addrange(mdn_ucsset_t ctx, unsigned long from, unsigned long to,
|
|
char *func_name)
|
|
{
|
|
/* Check the given code points. */
|
|
if (from > UCS_MAX) {
|
|
WARNING(("%s: code point out of range (U+%lX)\n",
|
|
func_name, from));
|
|
return (mdn_invalid_codepoint);
|
|
} else if (to > UCS_MAX) {
|
|
WARNING(("%s: code point out of range (U+%lX)\n",
|
|
func_name, to));
|
|
return (mdn_invalid_codepoint);
|
|
} else if (from > to) {
|
|
WARNING(("%s: invalid range spec (U+%lX-U+%lX)\n",
|
|
func_name, from, to));
|
|
return (mdn_invalid_codepoint);
|
|
}
|
|
|
|
/* Make sure it is not fixed yet. */
|
|
if (ctx->fixed) {
|
|
WARNING(("%s: attempt to add to already fixed object\n",
|
|
func_name));
|
|
return (mdn_failure);
|
|
}
|
|
|
|
/* Append the specified range to the 'ranges' array. */
|
|
if (ctx->nranges >= ctx->size) {
|
|
/* Make it bigger. */
|
|
if (ctx->size == 0)
|
|
ctx->size = INIT_SIZE;
|
|
else
|
|
ctx->size *= 2;
|
|
ctx->ranges = realloc(ctx->ranges,
|
|
ctx->size * sizeof(range_t));
|
|
if (ctx->ranges == NULL)
|
|
return (mdn_nomemory);
|
|
}
|
|
ctx->ranges[ctx->nranges].from = from;
|
|
ctx->ranges[ctx->nranges].to = to;
|
|
ctx->nranges++;
|
|
|
|
return (mdn_success);
|
|
}
|
|
|
|
static int
|
|
comp_range(const void *v1, const void *v2) {
|
|
/*
|
|
* Range comparation function suitable for qsort().
|
|
*/
|
|
const range_t *r1 = v1;
|
|
const range_t *r2 = v2;
|
|
|
|
if (r1->from < r2->from)
|
|
return (-1);
|
|
else if (r1->from > r2->from)
|
|
return (1);
|
|
else
|
|
return (0);
|
|
}
|