MDEV-27009 Add UCA-14.0.0 collations - adding uca-dump into build targets
- Adding uca-dump into build targets - Adding ctype-uca.h and moving implicit weight related routines there - Reusing implicit weight routines in ctype-uca.c and uca-dump.c - Adding handling of command line arguments to uca-dump - Fixing some compile-time warnings in uca-dump.c
This commit is contained in:
parent
45e0373a78
commit
bb84f61a26
1
debian/not-installed
vendored
1
debian/not-installed
vendored
@ -24,6 +24,7 @@ usr/lib/x86_64-linux-gnu/libidbboot.a # ColumnStore header file
|
|||||||
usr/lib/x86_64-linux-gnu/libprocessor.a # ColumnStore header file
|
usr/lib/x86_64-linux-gnu/libprocessor.a # ColumnStore header file
|
||||||
usr/lib/x86_64-linux-gnu/libwe_xml.a # ColumnStore header file
|
usr/lib/x86_64-linux-gnu/libwe_xml.a # ColumnStore header file
|
||||||
usr/bin/test-connect-t
|
usr/bin/test-connect-t
|
||||||
|
usr/bin/uca-dump
|
||||||
usr/bin/wsrep_sst_backup
|
usr/bin/wsrep_sst_backup
|
||||||
usr/lib/mysql/plugin/type_test.so
|
usr/lib/mysql/plugin/type_test.so
|
||||||
usr/lib/sysusers.d/mariadb.conf # Not used (yet) in Debian systemd
|
usr/lib/sysusers.d/mariadb.conf # Not used (yet) in Debian systemd
|
||||||
|
@ -37,3 +37,5 @@ MAYBE_DISABLE_IPO(strings)
|
|||||||
ADD_EXECUTABLE(conf_to_src EXCLUDE_FROM_ALL conf_to_src.c)
|
ADD_EXECUTABLE(conf_to_src EXCLUDE_FROM_ALL conf_to_src.c)
|
||||||
SET_TARGET_PROPERTIES(conf_to_src PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE)
|
SET_TARGET_PROPERTIES(conf_to_src PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE)
|
||||||
TARGET_LINK_LIBRARIES(conf_to_src mysys strings)
|
TARGET_LINK_LIBRARIES(conf_to_src mysys strings)
|
||||||
|
|
||||||
|
ADD_EXECUTABLE(uca-dump uca-dump.c)
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
|
|
||||||
#include "strings_def.h"
|
#include "strings_def.h"
|
||||||
#include <m_ctype.h>
|
#include <m_ctype.h>
|
||||||
|
#include "ctype-uca.h"
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
@ -31689,62 +31690,13 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t currwc,
|
|||||||
|
|
||||||
/****************************************************************/
|
/****************************************************************/
|
||||||
|
|
||||||
/**
|
|
||||||
Implicit weights for a code CP are constructed as follows:
|
|
||||||
[.AAAA.0020.0002][.BBBB.0000.0000]
|
|
||||||
|
|
||||||
where:
|
|
||||||
AAAA= BASE + (CP >> 15);
|
|
||||||
BBBB= (CP & 0x7FFF) | 0x8000;
|
|
||||||
|
|
||||||
There are two weights in the primary level (AAAA followed by BBBB).
|
|
||||||
There is one weight on other levels:
|
|
||||||
- 0020 on the secondary level
|
|
||||||
- 0002 on the tertiary level
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
Return BASE for an implicit weight on the primary level
|
|
||||||
|
|
||||||
According to UCA, BASE is calculated as follows:
|
|
||||||
- FB40 for Unified_Ideograph=True AND
|
|
||||||
((Block=CJK_Unified_Ideograph) OR
|
|
||||||
(Block=CJK_Compatibility_Ideographs))
|
|
||||||
- FB80 for Unified_Ideograph=True AND NOT
|
|
||||||
((Block=CJK_Unified_Ideograph) OR
|
|
||||||
(Block=CJK_Compatibility_Ideographs))
|
|
||||||
- FBC0 for any other code point
|
|
||||||
TODO: it seems we're not handling BASE correctly:
|
|
||||||
- check what are those blocks
|
|
||||||
- there are more Unified Ideograph blocks in the latest Unicode versions
|
|
||||||
*/
|
|
||||||
static inline uint16
|
|
||||||
my_uca_implicit_weight_base(my_wc_t code)
|
|
||||||
{
|
|
||||||
if (code >= 0x3400 && code <= 0x4DB5)
|
|
||||||
return 0xFB80;
|
|
||||||
if (code >= 0x4E00 && code <= 0x9FA5)
|
|
||||||
return 0xFB40;
|
|
||||||
return 0xFBC0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
my_uca_implicit_weight_put(uint16 *to, my_wc_t code, uint level)
|
my_uca_implicit_weight_put(uint16 *to, my_wc_t code, uint level)
|
||||||
{
|
{
|
||||||
switch (level) {
|
MY_UCA_IMPLICIT_WEIGHT weight;
|
||||||
case 1: to[0]= 0x0020; to[1]= 0; break; /* Secondary level */
|
weight= my_uca_520_implicit_weight_on_level(code, level);
|
||||||
case 2: to[0]= 0x0002; to[1]= 0; break; /* Tertiary level */
|
to[0]= weight.weight[0];
|
||||||
case 3: to[0]= 0x0001; to[1]= 0; break; /* Quaternary level */
|
to[1]= weight.weight[1];
|
||||||
default:
|
|
||||||
DBUG_ASSERT(0);
|
|
||||||
case 0:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* Primary level */
|
|
||||||
to[0]= (uint16)(code >> 15) + my_uca_implicit_weight_base(code);
|
|
||||||
to[1]= (code & 0x7FFF) | 0x8000;
|
|
||||||
to[2]= 0;
|
to[2]= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31766,10 +31718,11 @@ static inline int
|
|||||||
my_uca_scanner_next_implicit_primary(my_uca_scanner *scanner)
|
my_uca_scanner_next_implicit_primary(my_uca_scanner *scanner)
|
||||||
{
|
{
|
||||||
my_wc_t wc= (scanner->page << 8) + scanner->code;
|
my_wc_t wc= (scanner->page << 8) + scanner->code;
|
||||||
scanner->implicit[0]= (wc & 0x7FFF) | 0x8000; /* The second weight */
|
MY_UCA_IMPLICIT_WEIGHT weight= my_uca_520_implicit_weight_primary(wc);
|
||||||
scanner->implicit[1]= 0; /* 0 terminator */
|
scanner->implicit[0]= weight.weight[1]; /* The second weight */
|
||||||
|
scanner->implicit[1]= 0; /* 0 terminator */
|
||||||
scanner->wbeg= scanner->implicit;
|
scanner->wbeg= scanner->implicit;
|
||||||
return my_uca_implicit_weight_base(wc) + (wc >> 15);
|
return weight.weight[0]; /* The first weight */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
155
strings/ctype-uca.h
Normal file
155
strings/ctype-uca.h
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
#ifndef CTYPE_UCA_H
|
||||||
|
#define CTYPE_UCA_H
|
||||||
|
/* Copyright (c) 2021, MariaDB
|
||||||
|
|
||||||
|
This library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Library General Public
|
||||||
|
License as published by the Free Software Foundation; version 2
|
||||||
|
of the License.
|
||||||
|
|
||||||
|
This library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Library General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Library General Public
|
||||||
|
License along with this library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
|
||||||
|
MA 02110-1335 USA */
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Implicit weight handling is done according to
|
||||||
|
the section "Computing Implicit Weights" in
|
||||||
|
https://unicode.org/reports/tr10/#Values_For_Base_Table
|
||||||
|
(as of Unicode 14.0.0)
|
||||||
|
|
||||||
|
Implicit weights for a code CP are constructed as follows:
|
||||||
|
[.AAAA.0020.0002][.BBBB.0000.0000]
|
||||||
|
|
||||||
|
- There are two primary weights, depending on the character type and block.
|
||||||
|
- There is one weight on the secondary and tertiary levels.
|
||||||
|
|
||||||
|
AAAA and BBBB are computed using different formulas for:
|
||||||
|
- Siniform ideographic scripts
|
||||||
|
- Han
|
||||||
|
- Unassigned characters
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef struct my_uca_implict_weight_t
|
||||||
|
{
|
||||||
|
uint16 weight[2];
|
||||||
|
} MY_UCA_IMPLICIT_WEIGHT;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
By default, implicit weights for a code CP are constructed as follows:
|
||||||
|
[.AAAA.0020.0002][.BBBB.0000.0000]
|
||||||
|
|
||||||
|
where AAAA and BBBB are :
|
||||||
|
AAAA= BASE + (CP >> 15);
|
||||||
|
BBBB= (CP & 0x7FFF) | 0x8000;
|
||||||
|
|
||||||
|
This formula covers the following implicit weight subtypes:
|
||||||
|
- Core Han Unified Ideographs
|
||||||
|
- All other Han Unified Ideographs
|
||||||
|
- Unassigned characters
|
||||||
|
Every mentioned subtype passes a different BASE.
|
||||||
|
|
||||||
|
This formula does not cover Siniform ideographic scripts.
|
||||||
|
They are handled by separate functions.
|
||||||
|
*/
|
||||||
|
static inline MY_UCA_IMPLICIT_WEIGHT
|
||||||
|
my_uca_implicit_weight_primary_default(uint16 base, my_wc_t code)
|
||||||
|
{
|
||||||
|
MY_UCA_IMPLICIT_WEIGHT res;
|
||||||
|
res.weight[0]= (uint16) ((code >> 15) + base);
|
||||||
|
res.weight[1]= (uint16) ((code & 0x7FFF)|0x8000);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
Calculate Unicode-5.2.0 implicit weight on the primary level.
|
||||||
|
|
||||||
|
According to UCA, BASE is calculated as follows:
|
||||||
|
- FB40 for Unified_Ideograph=True AND
|
||||||
|
((Block=CJK_Unified_Ideograph) OR
|
||||||
|
(Block=CJK_Compatibility_Ideographs))
|
||||||
|
- FB80 for Unified_Ideograph=True AND NOT
|
||||||
|
((Block=CJK_Unified_Ideograph) OR
|
||||||
|
(Block=CJK_Compatibility_Ideographs))
|
||||||
|
- FBC0 for any other code point
|
||||||
|
|
||||||
|
But for Unicode-5.2.0 and Unicode-4.0.0 we used
|
||||||
|
a simplified formula as implemented before.
|
||||||
|
*/
|
||||||
|
static inline MY_UCA_IMPLICIT_WEIGHT
|
||||||
|
my_uca_520_implicit_weight_primary(my_wc_t code)
|
||||||
|
{
|
||||||
|
uint16 base;
|
||||||
|
/*
|
||||||
|
3400;<CJK Ideograph Extension A, First>
|
||||||
|
4DB5;<CJK Ideograph Extension A, Last>
|
||||||
|
4E00;<CJK Ideograph, First>
|
||||||
|
9FA5;<CJK Ideograph, Last>
|
||||||
|
*/
|
||||||
|
if (code >= 0x3400 && code <= 0x4DB5)
|
||||||
|
base= 0xFB80;
|
||||||
|
else if (code >= 0x4E00 && code <= 0x9FA5)
|
||||||
|
base= 0xFB40;
|
||||||
|
else
|
||||||
|
base= 0xFBC0;
|
||||||
|
|
||||||
|
return my_uca_implicit_weight_primary_default(base, code);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline MY_UCA_IMPLICIT_WEIGHT
|
||||||
|
my_uca_implicit_weight_secondary()
|
||||||
|
{
|
||||||
|
MY_UCA_IMPLICIT_WEIGHT res;
|
||||||
|
res.weight[0]= 0x0020;
|
||||||
|
res.weight[1]= 0;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline MY_UCA_IMPLICIT_WEIGHT
|
||||||
|
my_uca_implicit_weight_tertiary()
|
||||||
|
{
|
||||||
|
MY_UCA_IMPLICIT_WEIGHT res;
|
||||||
|
res.weight[0]= 0x0002;
|
||||||
|
res.weight[1]= 0;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline MY_UCA_IMPLICIT_WEIGHT
|
||||||
|
my_uca_implicit_weight_quaternary()
|
||||||
|
{
|
||||||
|
MY_UCA_IMPLICIT_WEIGHT res;
|
||||||
|
res.weight[0]= 0x0001;
|
||||||
|
res.weight[1]= 0;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline MY_UCA_IMPLICIT_WEIGHT
|
||||||
|
my_uca_520_implicit_weight_on_level(my_wc_t code, uint level)
|
||||||
|
{
|
||||||
|
switch (level) {
|
||||||
|
case 0:
|
||||||
|
return my_uca_520_implicit_weight_primary(code);
|
||||||
|
case 1:
|
||||||
|
return my_uca_implicit_weight_secondary();
|
||||||
|
case 2:
|
||||||
|
return my_uca_implicit_weight_tertiary();
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return my_uca_implicit_weight_quaternary();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* CTYPE_UCA_H */
|
@ -20,13 +20,14 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
typedef unsigned char uchar;
|
#include "my_global.h"
|
||||||
typedef unsigned short uint16;
|
#include "m_ctype.h"
|
||||||
|
#include "ctype-uca.h"
|
||||||
|
|
||||||
struct uca_item_st
|
struct uca_item_st
|
||||||
{
|
{
|
||||||
uchar num;
|
uchar num;
|
||||||
uint16 weight[4][9];
|
uint16 weight[4][MY_UCA_MAX_WEIGHT_SIZE];
|
||||||
};
|
};
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
@ -43,35 +44,139 @@ struct uca_item_st
|
|||||||
|
|
||||||
#define MAX_ALLOWED_CODE 0x10FFFF
|
#define MAX_ALLOWED_CODE 0x10FFFF
|
||||||
|
|
||||||
/* Name that goes into all array names */
|
|
||||||
static const char *global_name_prefix= "uca520";
|
typedef struct opt_st
|
||||||
|
{
|
||||||
|
const char *name_prefix; /* Name that goes into all array names */
|
||||||
|
const char *filename; /* The filename or "-" for stdin */
|
||||||
|
uint levels; /* The number of levels to dump */
|
||||||
|
} OPT;
|
||||||
|
|
||||||
|
|
||||||
|
static OPT defaults=
|
||||||
|
{
|
||||||
|
"uca",
|
||||||
|
"-",
|
||||||
|
3
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/* Name prefix that goes into page weight array names after global_name_prefix */
|
/* Name prefix that goes into page weight array names after global_name_prefix */
|
||||||
static char *pname_prefix[]= {"_p", "_p", "_p"};
|
static const char *pname_prefix[]= {"_p", "_p", "_p"};
|
||||||
|
|
||||||
/* Name suffix that goes into page weight array names after page number */
|
/* Name suffix that goes into page weight array names after page number */
|
||||||
static char *pname_suffix[]= {"", "_w2", "_w3"};
|
static const char *pname_suffix[]= {"", "_w2", "_w3"};
|
||||||
|
|
||||||
|
|
||||||
|
void usage(const char *prog)
|
||||||
|
{
|
||||||
|
printf("Usage:\n");
|
||||||
|
printf("%s [options] filename\n", prog);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline int lstrncmp(const char *str, const LEX_CSTRING lstr)
|
||||||
|
{
|
||||||
|
return strncmp(lstr.str, str, lstr.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int process_option(OPT *options, const char *opt)
|
||||||
|
{
|
||||||
|
static const LEX_CSTRING opt_name_prefix= {STRING_WITH_LEN("--name-prefix=")};
|
||||||
|
static const LEX_CSTRING opt_levels= {STRING_WITH_LEN("--levels=")};
|
||||||
|
if (!lstrncmp(opt, opt_name_prefix))
|
||||||
|
{
|
||||||
|
options->name_prefix= opt + opt_name_prefix.length;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (!lstrncmp(opt, opt_levels))
|
||||||
|
{
|
||||||
|
options->levels= (uint) strtoul(opt + opt_levels.length, NULL, 10);
|
||||||
|
if (options->levels < 1 || options->levels > 3)
|
||||||
|
{
|
||||||
|
printf("Bad --levels value\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
printf("Unknown option: %s\n", opt);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int process_options(OPT *options, int ac, char **av)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i= 1; i < ac; i++)
|
||||||
|
{
|
||||||
|
if (!strncmp(av[i], "--", 2))
|
||||||
|
{
|
||||||
|
if (process_option(options, av[i]))
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (i + 1 != ac)
|
||||||
|
{
|
||||||
|
usage(av[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
options->filename= av[i];
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
usage(av[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
FILE *open_file(const char *name)
|
||||||
|
{
|
||||||
|
if (!strcmp(name, "-"))
|
||||||
|
return stdin;
|
||||||
|
return fopen(name, "r");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void close_file(FILE *file)
|
||||||
|
{
|
||||||
|
if (file != stdin)
|
||||||
|
fclose(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int ac, char **av)
|
int main(int ac, char **av)
|
||||||
{
|
{
|
||||||
char str[256];
|
char str[1024];
|
||||||
char *weights[64];
|
char *weights[64];
|
||||||
static struct uca_item_st uca[MAX_ALLOWED_CODE+1];
|
static struct uca_item_st uca[MAX_ALLOWED_CODE+1];
|
||||||
size_t code, w;
|
my_wc_t code;
|
||||||
|
uint w;
|
||||||
int pageloaded[MY_UCA_NPAGES];
|
int pageloaded[MY_UCA_NPAGES];
|
||||||
|
FILE *file;
|
||||||
|
OPT options= defaults;
|
||||||
|
|
||||||
|
if (process_options(&options, ac, av))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!(file= open_file(options.filename)))
|
||||||
|
{
|
||||||
|
printf("Could not open %s for reading\n", options.filename);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
bzero(uca, sizeof(uca));
|
bzero(uca, sizeof(uca));
|
||||||
bzero(pageloaded, sizeof(pageloaded));
|
bzero(pageloaded, sizeof(pageloaded));
|
||||||
|
|
||||||
while (fgets(str,sizeof(str),stdin))
|
while (fgets(str, sizeof(str), file))
|
||||||
{
|
{
|
||||||
char *comment;
|
char *comment;
|
||||||
char *weight;
|
char *weight;
|
||||||
char *s;
|
char *s;
|
||||||
size_t codenum;
|
size_t codenum;
|
||||||
|
|
||||||
code= strtol(str,NULL,16);
|
code= (my_wc_t) strtol(str,NULL,16);
|
||||||
|
|
||||||
if (str[0]=='#' || (code > MAX_ALLOWED_CODE))
|
if (str[0]=='#' || (code > MAX_ALLOWED_CODE))
|
||||||
continue;
|
continue;
|
||||||
@ -116,7 +221,9 @@ int main(int ac, char **av)
|
|||||||
uca[code].num++;
|
uca[code].num++;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (w=0; w < uca[code].num; w++)
|
set_if_smaller(uca[code].num, MY_UCA_MAX_WEIGHT_SIZE-1);
|
||||||
|
|
||||||
|
for (w=0; w < uca[code].num ; w++)
|
||||||
{
|
{
|
||||||
size_t partnum;
|
size_t partnum;
|
||||||
|
|
||||||
@ -125,9 +232,8 @@ int main(int ac, char **av)
|
|||||||
while (*s)
|
while (*s)
|
||||||
{
|
{
|
||||||
char *endptr;
|
char *endptr;
|
||||||
size_t part;
|
uint part= (uint) strtoul(s + 1, &endptr, 16);
|
||||||
part= strtol(s+1,&endptr,16);
|
uca[code].weight[partnum][w]= (uint16) part;
|
||||||
uca[code].weight[partnum][w]= part;
|
|
||||||
s= endptr;
|
s= endptr;
|
||||||
partnum++;
|
partnum++;
|
||||||
}
|
}
|
||||||
@ -135,45 +241,24 @@ int main(int ac, char **av)
|
|||||||
/* Mark that a character from this page was loaded */
|
/* Mark that a character from this page was loaded */
|
||||||
pageloaded[code >> MY_UCA_PSHIFT]++;
|
pageloaded[code >> MY_UCA_PSHIFT]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
close_file(file);
|
||||||
|
|
||||||
/* Now set implicit weights */
|
/* Now set implicit weights */
|
||||||
for (code=0; code <= MAX_ALLOWED_CODE; code++)
|
for (code=0; code <= MAX_ALLOWED_CODE; code++)
|
||||||
{
|
{
|
||||||
size_t base, aaaa, bbbb;
|
uint level;
|
||||||
|
|
||||||
if (uca[code].num)
|
if (uca[code].num)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/*
|
for (level= 0; level < 4; level++)
|
||||||
3400;<CJK Ideograph Extension A, First>
|
{
|
||||||
4DB5;<CJK Ideograph Extension A, Last>
|
MY_UCA_IMPLICIT_WEIGHT weight;
|
||||||
4E00;<CJK Ideograph, First>
|
weight= my_uca_520_implicit_weight_on_level(code, level);
|
||||||
9FA5;<CJK Ideograph, Last>
|
uca[code].weight[level][0]= weight.weight[0];
|
||||||
*/
|
uca[code].weight[level][1]= weight.weight[1];
|
||||||
|
}
|
||||||
if (code >= 0x3400 && code <= 0x4DB5)
|
|
||||||
base= 0xFB80;
|
|
||||||
else if (code >= 0x4E00 && code <= 0x9FA5)
|
|
||||||
base= 0xFB40;
|
|
||||||
else
|
|
||||||
base= 0xFBC0;
|
|
||||||
|
|
||||||
aaaa= base + (code >> 15);
|
|
||||||
bbbb= (code & 0x7FFF) | 0x8000;
|
|
||||||
uca[code].weight[0][0]= aaaa;
|
|
||||||
uca[code].weight[0][1]= bbbb;
|
|
||||||
|
|
||||||
uca[code].weight[1][0]= 0x0020;
|
|
||||||
uca[code].weight[1][1]= 0x0000;
|
|
||||||
|
|
||||||
uca[code].weight[2][0]= 0x0002;
|
|
||||||
uca[code].weight[2][1]= 0x0000;
|
|
||||||
|
|
||||||
uca[code].weight[3][0]= 0x0001;
|
|
||||||
uca[code].weight[3][2]= 0x0000;
|
|
||||||
|
|
||||||
uca[code].num= 2;
|
uca[code].num= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -184,7 +269,7 @@ int main(int ac, char **av)
|
|||||||
printf("#define MY_UCA_CMASK %d\n",MY_UCA_CMASK);
|
printf("#define MY_UCA_CMASK %d\n",MY_UCA_CMASK);
|
||||||
printf("#define MY_UCA_PSHIFT %d\n",MY_UCA_PSHIFT);
|
printf("#define MY_UCA_PSHIFT %d\n",MY_UCA_PSHIFT);
|
||||||
|
|
||||||
for (w=0; w<3; w++)
|
for (w=0; w < options.levels; w++)
|
||||||
{
|
{
|
||||||
size_t page;
|
size_t page;
|
||||||
int pagemaxlen[MY_UCA_NPAGES];
|
int pagemaxlen[MY_UCA_NPAGES];
|
||||||
@ -259,7 +344,7 @@ int main(int ac, char **av)
|
|||||||
default: mchars= uca[code].num;
|
default: mchars= uca[code].num;
|
||||||
}
|
}
|
||||||
|
|
||||||
pagemaxlen[page]= maxnum;
|
pagemaxlen[page]= (int) maxnum;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -268,12 +353,12 @@ int main(int ac, char **av)
|
|||||||
|
|
||||||
|
|
||||||
printf("static const uint16 %s%s%03X%s[]= { /* %04X (%d weights per char) */\n",
|
printf("static const uint16 %s%s%03X%s[]= { /* %04X (%d weights per char) */\n",
|
||||||
global_name_prefix, pname_prefix[w], (int) page, pname_suffix[w],
|
options.name_prefix, pname_prefix[w], (int) page, pname_suffix[w],
|
||||||
(int) page*MY_UCA_NCHARS, (int) maxnum);
|
(int) page*MY_UCA_NCHARS, (int) maxnum);
|
||||||
|
|
||||||
for (offs=0; offs < MY_UCA_NCHARS; offs++)
|
for (offs=0; offs < MY_UCA_NCHARS; offs++)
|
||||||
{
|
{
|
||||||
uint16 weight[8];
|
uint16 weight[MY_UCA_MAX_WEIGHT_SIZE];
|
||||||
size_t num, i;
|
size_t num, i;
|
||||||
|
|
||||||
code= page*MY_UCA_NCHARS+offs;
|
code= page*MY_UCA_NCHARS+offs;
|
||||||
@ -324,7 +409,7 @@ int main(int ac, char **av)
|
|||||||
}
|
}
|
||||||
|
|
||||||
printf("const uchar %s_length%s[%d]={\n",
|
printf("const uchar %s_length%s[%d]={\n",
|
||||||
global_name_prefix, pname_suffix[w], MY_UCA_NPAGES);
|
options.name_prefix, pname_suffix[w], MY_UCA_NPAGES);
|
||||||
for (page=0; page < MY_UCA_NPAGES; page++)
|
for (page=0; page < MY_UCA_NPAGES; page++)
|
||||||
{
|
{
|
||||||
printf("%d%s%s",pagemaxlen[page],page<MY_UCA_NPAGES-1?",":"",(page+1) % 16 ? "":"\n");
|
printf("%d%s%s",pagemaxlen[page],page<MY_UCA_NPAGES-1?",":"",(page+1) % 16 ? "":"\n");
|
||||||
@ -333,7 +418,7 @@ int main(int ac, char **av)
|
|||||||
|
|
||||||
|
|
||||||
printf("static const uint16 *%s_weight%s[%d]={\n",
|
printf("static const uint16 *%s_weight%s[%d]={\n",
|
||||||
global_name_prefix, pname_suffix[w], MY_UCA_NPAGES);
|
options.name_prefix, pname_suffix[w], MY_UCA_NPAGES);
|
||||||
for (page=0; page < MY_UCA_NPAGES; page++)
|
for (page=0; page < MY_UCA_NPAGES; page++)
|
||||||
{
|
{
|
||||||
const char *comma= page < MY_UCA_NPAGES-1 ? "," : "";
|
const char *comma= page < MY_UCA_NPAGES-1 ? "," : "";
|
||||||
@ -342,7 +427,7 @@ int main(int ac, char **av)
|
|||||||
printf("NULL %s%s%s", w ? " ": "", comma , nline);
|
printf("NULL %s%s%s", w ? " ": "", comma , nline);
|
||||||
else
|
else
|
||||||
printf("%s%s%03X%s%s%s",
|
printf("%s%s%03X%s%s%s",
|
||||||
global_name_prefix, pname_prefix[w], (int) page, pname_suffix[w],
|
options.name_prefix, pname_prefix[w], (int) page, pname_suffix[w],
|
||||||
comma, nline);
|
comma, nline);
|
||||||
}
|
}
|
||||||
printf("};\n");
|
printf("};\n");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user