440 lines
11 KiB
C
440 lines
11 KiB
C
#ifndef lint
|
|
static char *rcsid = "$Id: normalizer.c,v 1.1 2003/06/04 00:26:05 marka Exp $";
|
|
#endif
|
|
|
|
/*
|
|
* Copyright (c) 2000,2002 Japan Network Information Center.
|
|
* All rights reserved.
|
|
*
|
|
* By using this file, you agree to the terms and conditions set forth bellow.
|
|
*
|
|
* LICENSE TERMS AND CONDITIONS
|
|
*
|
|
* The following License Terms and Conditions apply, unless a different
|
|
* license is obtained from Japan Network Information Center ("JPNIC"),
|
|
* a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
|
|
* Chiyoda-ku, Tokyo 101-0047, Japan.
|
|
*
|
|
* 1. Use, Modification and Redistribution (including distribution of any
|
|
* modified or derived work) in source and/or binary forms is permitted
|
|
* under this License Terms and Conditions.
|
|
*
|
|
* 2. Redistribution of source code must retain the copyright notices as they
|
|
* appear in each source code file, this License Terms and Conditions.
|
|
*
|
|
* 3. Redistribution in binary form must reproduce the Copyright Notice,
|
|
* this License Terms and Conditions, in the documentation and/or other
|
|
* materials provided with the distribution. For the purposes of binary
|
|
* distribution the "Copyright Notice" refers to the following language:
|
|
* "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
|
|
*
|
|
* 4. The name of JPNIC may not be used to endorse or promote products
|
|
* derived from this Software without specific prior written approval of
|
|
* JPNIC.
|
|
*
|
|
* 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
|
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include <stddef.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
#include <idn/assert.h>
|
|
#include <idn/logmacro.h>
|
|
#include <idn/result.h>
|
|
#include <idn/normalizer.h>
|
|
#include <idn/strhash.h>
|
|
#include <idn/unormalize.h>
|
|
#include <idn/unicode.h>
|
|
#include <idn/ucs4.h>
|
|
#include <idn/debug.h>
|
|
#include <idn/util.h>
|
|
|
|
#define MAX_LOCAL_SCHEME 3
|
|
|
|
#define INITIALIZED (scheme_hash != NULL)
|
|
|
|
typedef struct {
|
|
char *name;
|
|
idn_normalizer_proc_t proc;
|
|
} normalize_scheme_t;
|
|
|
|
struct idn_normalizer {
|
|
int nschemes;
|
|
int scheme_size;
|
|
normalize_scheme_t **schemes;
|
|
normalize_scheme_t *local_buf[MAX_LOCAL_SCHEME];
|
|
int reference_count;
|
|
};
|
|
|
|
static idn__strhash_t scheme_hash;
|
|
|
|
static idn__unicode_version_t vcur = NULL;
|
|
static idn__unicode_version_t v320 = NULL;
|
|
#define INIT_VERSION(version, var) \
|
|
if (var == NULL) { \
|
|
idn_result_t r = idn__unicode_create(version, &var); \
|
|
if (r != idn_success) \
|
|
return (r); \
|
|
}
|
|
|
|
static idn_result_t expand_schemes(idn_normalizer_t ctx);
|
|
static idn_result_t register_standard_normalizers(void);
|
|
static idn_result_t normalizer_formkc(const unsigned long *from,
|
|
unsigned long *to, size_t tolen);
|
|
static idn_result_t normalizer_formkc_v320(const unsigned long *from,
|
|
unsigned long *to,
|
|
size_t tolen);
|
|
|
|
static struct standard_normalizer {
|
|
char *name;
|
|
idn_normalizer_proc_t proc;
|
|
} standard_normalizer[] = {
|
|
{ "unicode-form-kc", normalizer_formkc },
|
|
{ "unicode-form-kc/3.2.0", normalizer_formkc_v320 },
|
|
{ "RFC3491", normalizer_formkc_v320 },
|
|
{ NULL, NULL },
|
|
};
|
|
|
|
idn_result_t
|
|
idn_normalizer_initialize(void) {
|
|
idn__strhash_t hash;
|
|
idn_result_t r;
|
|
|
|
TRACE(("idn_normalizer_initialize()\n"));
|
|
|
|
if (scheme_hash != NULL) {
|
|
r = idn_success; /* already initialized */
|
|
goto ret;
|
|
}
|
|
|
|
if ((r = idn__strhash_create(&hash)) != idn_success)
|
|
goto ret;
|
|
scheme_hash = hash;
|
|
|
|
/* Register standard normalizers */
|
|
r = register_standard_normalizers();
|
|
ret:
|
|
TRACE(("idn_normalizer_initialize(): %s\n", idn_result_tostring(r)));
|
|
return (r);
|
|
}
|
|
|
|
idn_result_t
|
|
idn_normalizer_create(idn_normalizer_t *ctxp) {
|
|
idn_normalizer_t ctx;
|
|
idn_result_t r;
|
|
|
|
assert(ctxp != NULL);
|
|
TRACE(("idn_normalizer_create()\n"));
|
|
|
|
if ((ctx = malloc(sizeof(struct idn_normalizer))) == NULL) {
|
|
r = idn_nomemory;
|
|
goto ret;
|
|
}
|
|
|
|
ctx->nschemes = 0;
|
|
ctx->scheme_size = MAX_LOCAL_SCHEME;
|
|
ctx->schemes = ctx->local_buf;
|
|
ctx->reference_count = 1;
|
|
*ctxp = ctx;
|
|
|
|
r = idn_success;
|
|
ret:
|
|
TRACE(("idn_normalizer_create(): %s\n", idn_result_tostring(r)));
|
|
return (r);
|
|
}
|
|
|
|
void
|
|
idn_normalizer_destroy(idn_normalizer_t ctx) {
|
|
assert(ctx != NULL);
|
|
|
|
TRACE(("idn_normalizer_destroy()\n"));
|
|
|
|
ctx->reference_count--;
|
|
if (ctx->reference_count <= 0) {
|
|
TRACE(("idn_normalizer_destroy(): the object is destroyed\n"));
|
|
if (ctx->schemes != ctx->local_buf)
|
|
free(ctx->schemes);
|
|
free(ctx);
|
|
} else {
|
|
TRACE(("idn_normalizer_destroy(): "
|
|
"update reference count (%d->%d)\n",
|
|
ctx->reference_count + 1, ctx->reference_count));
|
|
}
|
|
}
|
|
|
|
void
|
|
idn_normalizer_incrref(idn_normalizer_t ctx) {
|
|
assert(ctx != NULL);
|
|
|
|
TRACE(("idn_normalizer_incrref()\n"));
|
|
TRACE(("idn_normalizer_incrref: update reference count (%d->%d)\n",
|
|
ctx->reference_count, ctx->reference_count + 1));
|
|
|
|
ctx->reference_count++;
|
|
}
|
|
|
|
idn_result_t
|
|
idn_normalizer_add(idn_normalizer_t ctx, const char *scheme_name) {
|
|
idn_result_t r;
|
|
void *v;
|
|
normalize_scheme_t *scheme;
|
|
|
|
assert(ctx != NULL && scheme_name != NULL);
|
|
|
|
TRACE(("idn_normalizer_add(scheme_name=%s)\n", scheme_name));
|
|
|
|
assert(INITIALIZED);
|
|
|
|
if (idn__strhash_get(scheme_hash, scheme_name, &v) != idn_success) {
|
|
ERROR(("idn_normalizer_add(): invalid scheme \"%-.30s\"\n",
|
|
scheme_name));
|
|
r = idn_invalid_name;
|
|
goto ret;
|
|
}
|
|
|
|
scheme = v;
|
|
|
|
assert(ctx->nschemes <= ctx->scheme_size);
|
|
|
|
if (ctx->nschemes == ctx->scheme_size &&
|
|
(r = expand_schemes(ctx)) != idn_success) {
|
|
goto ret;
|
|
}
|
|
|
|
ctx->schemes[ctx->nschemes++] = scheme;
|
|
r = idn_success;
|
|
ret:
|
|
TRACE(("idn_normalizer_add(): %s\n", idn_result_tostring(r)));
|
|
return (r);
|
|
}
|
|
|
|
idn_result_t
|
|
idn_normalizer_addall(idn_normalizer_t ctx, const char **scheme_names,
|
|
int nschemes) {
|
|
idn_result_t r;
|
|
int i;
|
|
|
|
assert(ctx != NULL && scheme_names != NULL);
|
|
|
|
TRACE(("idn_normalizer_addall(nschemes=%d)\n", nschemes));
|
|
|
|
for (i = 0; i < nschemes; i++) {
|
|
r = idn_normalizer_add(ctx, (const char *)*scheme_names);
|
|
if (r != idn_success)
|
|
goto ret;
|
|
scheme_names++;
|
|
}
|
|
|
|
r = idn_success;
|
|
ret:
|
|
TRACE(("idn_normalizer_addall(): %s\n", idn_result_tostring(r)));
|
|
return (r);
|
|
}
|
|
|
|
idn_result_t
|
|
idn_normalizer_normalize(idn_normalizer_t ctx, const unsigned long *from,
|
|
unsigned long *to, size_t tolen) {
|
|
idn_result_t r;
|
|
unsigned long *src, *dst;
|
|
unsigned long *buffers[2] = {NULL, NULL};
|
|
size_t buflen[2] = {0, 0};
|
|
size_t dstlen;
|
|
int idx;
|
|
int i;
|
|
|
|
assert(scheme_hash != NULL);
|
|
assert(ctx != NULL && from != NULL && to != NULL);
|
|
|
|
TRACE(("idn_normalizer_normalize(from=\"%s\", tolen=%d)\n",
|
|
idn__debug_ucs4xstring(from, 50), (int)tolen));
|
|
|
|
if (ctx->nschemes <= 0) {
|
|
if (tolen < idn_ucs4_strlen(from) + 1) {
|
|
r = idn_buffer_overflow;
|
|
goto ret;
|
|
}
|
|
idn_ucs4_strcpy(to, from);
|
|
r = idn_success;
|
|
goto ret;
|
|
}
|
|
|
|
/*
|
|
* Normalize.
|
|
*/
|
|
src = (void *)from;
|
|
dstlen = idn_ucs4_strlen(from) + 1;
|
|
|
|
i = 0;
|
|
while (i < ctx->nschemes) {
|
|
TRACE(("idn_normalizer_normalize(): normalize %s\n",
|
|
ctx->schemes[i]->name));
|
|
|
|
/*
|
|
* Choose destination area to restore the result of a mapping.
|
|
*/
|
|
if (i + 1 == ctx->nschemes) {
|
|
dst = to;
|
|
dstlen = tolen;
|
|
} else {
|
|
if (src == buffers[0])
|
|
idx = 1;
|
|
else
|
|
idx = 0;
|
|
|
|
if (buflen[idx] < dstlen) {
|
|
void *newbuf;
|
|
|
|
newbuf = realloc(buffers[idx],
|
|
sizeof(long) * dstlen);
|
|
if (newbuf == NULL) {
|
|
r = idn_nomemory;
|
|
goto ret;
|
|
}
|
|
buffers[idx] = (unsigned long *)newbuf;
|
|
buflen[idx] = dstlen;
|
|
}
|
|
|
|
dst = buffers[idx];
|
|
dstlen = buflen[idx];
|
|
}
|
|
|
|
/*
|
|
* Perform i-th normalization scheme.
|
|
* If buffer size is not enough, we double it and try again.
|
|
*/
|
|
r = (ctx->schemes[i]->proc)(src, dst, dstlen);
|
|
if (r == idn_buffer_overflow && dst != to) {
|
|
dstlen *= 2;
|
|
continue;
|
|
}
|
|
if (r != idn_success)
|
|
goto ret;
|
|
|
|
src = dst;
|
|
i++;
|
|
}
|
|
|
|
r = idn_success;
|
|
ret:
|
|
free(buffers[0]);
|
|
free(buffers[1]);
|
|
if (r == idn_success) {
|
|
TRACE(("idn_normalizer_normalize(): success (to=\"%s\")\n",
|
|
idn__debug_ucs4xstring(to, 50)));
|
|
} else {
|
|
TRACE(("idn_normalizer_normalize(): %s\n",
|
|
idn_result_tostring(r)));
|
|
}
|
|
return (r);
|
|
}
|
|
|
|
idn_result_t
|
|
idn_normalizer_register(const char *scheme_name, idn_normalizer_proc_t proc) {
|
|
idn_result_t r;
|
|
normalize_scheme_t *scheme;
|
|
|
|
assert(scheme_name != NULL && proc != NULL);
|
|
|
|
TRACE(("idn_normalizer_register(scheme_name=%s)\n", scheme_name));
|
|
|
|
assert(INITIALIZED);
|
|
|
|
scheme = malloc(sizeof(*scheme) + strlen(scheme_name) + 1);
|
|
if (scheme == NULL) {
|
|
r = idn_nomemory;
|
|
goto ret;
|
|
}
|
|
scheme->name = (char *)(scheme + 1);
|
|
(void)strcpy(scheme->name, scheme_name);
|
|
scheme->proc = proc;
|
|
|
|
r = idn__strhash_put(scheme_hash, scheme_name, scheme);
|
|
if (r != idn_success)
|
|
goto ret;
|
|
|
|
r = idn_success;
|
|
ret:
|
|
TRACE(("idn_normalizer_register(): %s\n", idn_result_tostring(r)));
|
|
return (r);
|
|
}
|
|
|
|
static idn_result_t
|
|
expand_schemes(idn_normalizer_t ctx) {
|
|
normalize_scheme_t **new_schemes;
|
|
int new_size = ctx->scheme_size * 2;
|
|
|
|
if (ctx->schemes == ctx->local_buf) {
|
|
new_schemes = malloc(sizeof(normalize_scheme_t) * new_size);
|
|
} else {
|
|
new_schemes = realloc(ctx->schemes,
|
|
sizeof(normalize_scheme_t) * new_size);
|
|
}
|
|
if (new_schemes == NULL)
|
|
return (idn_nomemory);
|
|
|
|
if (ctx->schemes == ctx->local_buf)
|
|
memcpy(new_schemes, ctx->local_buf, sizeof(ctx->local_buf));
|
|
|
|
ctx->schemes = new_schemes;
|
|
ctx->scheme_size = new_size;
|
|
|
|
return (idn_success);
|
|
}
|
|
|
|
static idn_result_t
|
|
register_standard_normalizers(void) {
|
|
int i;
|
|
int failed = 0;
|
|
|
|
for (i = 0; standard_normalizer[i].name != NULL; i++) {
|
|
idn_result_t r;
|
|
r = idn_normalizer_register(standard_normalizer[i].name,
|
|
standard_normalizer[i].proc);
|
|
if (r != idn_success) {
|
|
WARNING(("idn_normalizer_initialize(): "
|
|
"failed to register \"%-.100s\"\n",
|
|
standard_normalizer[i].name));
|
|
failed++;
|
|
}
|
|
}
|
|
if (failed > 0)
|
|
return (idn_failure);
|
|
else
|
|
return (idn_success);
|
|
}
|
|
|
|
/*
|
|
* Unicode Normalization Forms -- latest version
|
|
*/
|
|
|
|
static idn_result_t
|
|
normalizer_formkc(const unsigned long *from, unsigned long *to, size_t tolen) {
|
|
INIT_VERSION(NULL, vcur);
|
|
return (idn__unormalize_formkc(vcur, from, to, tolen));
|
|
}
|
|
|
|
/*
|
|
* Unicode Normalization Forms -- version 3.2.0
|
|
*/
|
|
|
|
static idn_result_t
|
|
normalizer_formkc_v320(const unsigned long *from, unsigned long *to,
|
|
size_t tolen) {
|
|
INIT_VERSION("3.2.0", v320);
|
|
return (idn__unormalize_formkc(v320, from, to, tolen));
|
|
}
|