зеркало из
https://github.com/iharh/notes.git
synced 2025-11-01 14:16:09 +02:00
775 строки
21 KiB
Plaintext
775 строки
21 KiB
Plaintext
[special]
|
|
;
|
|
[nostats]
|
|
;
|
|
; Speech grammar rule table
|
|
;
|
|
[attrs]
|
|
; c is a conjunction
|
|
;
|
|
; chs (currently used)
|
|
p(1) + n = 5 ; chs-only
|
|
n + f(1) = 300 ; both
|
|
n + m(1) = 500 ; both
|
|
n(1) + v = 100 ; both
|
|
n + v(1) = 10 ; both
|
|
r + n(1) = 1000 ; both
|
|
r(1) + n = 100 ; both
|
|
d(1) + r = 100 ; both
|
|
d(1) + v = 100 ; both
|
|
v(1) + r = 100 ; both
|
|
n + m(1) = 500 ; both
|
|
v + f(1) = 30 ; both
|
|
v(1) + m = 100 ; both
|
|
v(1) + n = 3 ; =3 for chs, =5 for cht
|
|
a + u(1) = 5 ; both
|
|
v + n(1) = 5 ; both
|
|
u(1) + a = 2 ; chs-only
|
|
ns + *(1) = 10 ; chs-only
|
|
c(1) + * = 50 ; both
|
|
* + c(1) = 50 ; both
|
|
;
|
|
; cht (traditional)
|
|
v(1) + n = 5 ; cht-only
|
|
|
|
|
|
// config.h
|
|
|
|
#define SCWS_ATTR_LEN 20
|
|
|
|
// xdict.h
|
|
|
|
/* data structure for word(12bytes) */
|
|
typedef struct scws_word
|
|
{
|
|
float tf;
|
|
float idf;
|
|
unsigned char flag;
|
|
char attr[SCWS_ATTR_LEN + 1]; // POS - https://github.com/hightman/scws/blob/master/API.md
|
|
} word_st, *word_t;
|
|
|
|
|
|
// rule.h
|
|
|
|
#define SCWS_RULE_MAX 32
|
|
#define SCWS_RULE_SPECIAL 0x80000000
|
|
#define SCWS_RULE_NOSTATS 0x40000000
|
|
|
|
/* flag: 0x00 ~ 0x4000 */
|
|
#define SCWS_ZRULE_NONE 0x00
|
|
#define SCWS_ZRULE_PREFIX 0x01
|
|
#define SCWS_ZRULE_SUFFIX 0x02
|
|
#define SCWS_ZRULE_INCLUDE 0x04 /* with include */
|
|
#define SCWS_ZRULE_EXCLUDE 0x08 /* with exclude */
|
|
#define SCWS_ZRULE_RANGE 0x10 /* with znum range */
|
|
|
|
/* data structure */
|
|
typedef struct scws_rule_item // section
|
|
{
|
|
short flag;
|
|
char zmin;
|
|
char zmax;
|
|
char name[17]; // section name
|
|
char attr[SCWS_ATTR_LEN + 1]; // default - "un"
|
|
float tf; // default - 5.0
|
|
float idf; // default - 3.5
|
|
unsigned int bit; /* my bit */
|
|
unsigned int inc; /* include */
|
|
unsigned int exc; /* exclude */
|
|
} *rule_item_t;
|
|
|
|
/* special attrs ratio list(single chain, 12bytes) */
|
|
typedef struct scws_rule_attr *rule_attr_t;
|
|
struct scws_rule_attr
|
|
{
|
|
char attr1[SCWS_ATTR_LEN + 1]; // ? v1
|
|
char attr2[SCWS_ATTR_LEN + 1]; // ? v2
|
|
unsigned char npath[2]; // ?? 0xff - by default, (1) -> 0, else -> 0xff
|
|
// used by rule.c
|
|
// - scws_rule_new,
|
|
// - int scws_rule_attr_ratio(rule_t r, const char *attr1, const char *attr2, const unsigned char *npath)
|
|
// // get rule attr x
|
|
// -- scws.c - _scws_mseg_zone,
|
|
// --- static void _scws_msegment(scws_t s, int end, int zlen)
|
|
// ---- scws_res_t scws_get_result(scws_t s)
|
|
|
|
short ratio; // ? = <num>
|
|
rule_attr_t next; // ptr to the next scws_rule_attr
|
|
};
|
|
|
|
typedef struct scws_rule
|
|
{
|
|
xtree_t tree;
|
|
rule_attr_t attr; // [attrs] section: ptr to the first item of the attrs list
|
|
struct scws_rule_item items[SCWS_RULE_MAX]; // SCWS_RULE_MAX is 32
|
|
} rule_st, *rule_t;
|
|
|
|
/* scws ruleset: api */
|
|
|
|
/* create & load ruleset, by fpath & charset */
|
|
rule_t scws_rule_new(const char *fpath, unsigned char *mblen);
|
|
|
|
/* free the memory & resource for ruleset */
|
|
void scws_rule_free(rule_t r);
|
|
|
|
/* get the rule tree record by str */
|
|
rule_item_t scws_rule_get(rule_t r, const char *str, int len);
|
|
|
|
/* check bit */
|
|
int scws_rule_checkbit(rule_t r, const char *str, int len, unsigned int bit);
|
|
|
|
/* get rule attr x */
|
|
int scws_rule_attr_ratio(rule_t r, const char *attr1, const char *attr2, const unsigned char *npath);
|
|
|
|
/* check exclude or include */
|
|
int scws_rule_check(rule_t r, rule_item_t cr, const char *str, int len);
|
|
|
|
static inline int _rule_index_get(rule_t r, const char *name)
|
|
{
|
|
int i;
|
|
for (i = 0; i < SCWS_RULE_MAX; i++)
|
|
{
|
|
if (r->items[i].name[0] == '\0') // don't go by the end of the items list
|
|
break;
|
|
|
|
if (!strcasecmp(r->items[i].name, name)) // found item with the given name at the items list
|
|
return i;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
rule_t scws_rule_new(const char *fpath, unsigned char *mblen)
|
|
{
|
|
FILE *fp;
|
|
rule_t r;
|
|
rule_item_t cr; // cr - current rule item
|
|
int i, j, rbl, aflag; // rbl - read by line, aflag - attrs-section parsing,
|
|
rule_attr_t a,rtail;
|
|
unsigned char buf[512], *str, *ptr, *qtr;
|
|
|
|
/* loaded or open file failed */
|
|
if ((fp = fopen(fpath, "r")) == NULL)
|
|
return NULL;
|
|
|
|
/* alloc the memory */
|
|
r = (rule_t) malloc(sizeof(rule_st));
|
|
memset(r, 0, sizeof(rule_st));
|
|
|
|
/* quick scan to add the name to list */
|
|
i = j = rbl = aflag = 0;
|
|
while (fgets(buf, sizeof(buf)-1, fp))
|
|
{
|
|
if (buf[0] != '[' || !(ptr = strchr(buf, ']'))) // we are interested in [sections] only
|
|
continue;
|
|
|
|
str = buf + 1;
|
|
*ptr = '\0';
|
|
if (ptr == str || (ptr-str) > 15 || !strcasecmp(str, "attrs")) // skip [attrs] section
|
|
continue;
|
|
|
|
if (_rule_index_get(r, str) >= 0) // already in the items list
|
|
continue;
|
|
|
|
// !!! i is the rule index here from 0 to SCWS_RULE_MAX (== 32)
|
|
|
|
strcpy(r->items[i].name, str);
|
|
r->items[i].tf = 5.0; // default tf is 5.0
|
|
r->items[i].idf = 3.5; // default idf is 3.5
|
|
strncpy(r->items[i].attr, "un", 2); // default attr is "un"
|
|
|
|
// special and nostats are just a plain words
|
|
|
|
if (!strcasecmp(str, "special"))
|
|
r->items[i].bit = SCWS_RULE_SPECIAL;
|
|
else if (!strcasecmp(str, "nostats"))
|
|
r->items[i].bit = SCWS_RULE_NOSTATS;
|
|
else
|
|
{
|
|
r->items[i].bit = (1<<j);
|
|
j++;
|
|
}
|
|
|
|
if (++i >= SCWS_RULE_MAX) // all section has already been read
|
|
break;
|
|
}
|
|
|
|
// 2-nd pass
|
|
rewind(fp);
|
|
|
|
/* load the tree data */
|
|
if ((r->tree = xtree_new(0, 1)) == NULL)
|
|
{
|
|
free(r);
|
|
return NULL;
|
|
}
|
|
cr = NULL;
|
|
while (fgets(buf, sizeof(buf)-1, fp))
|
|
{
|
|
if (buf[0] == ';') // comment
|
|
continue;
|
|
|
|
if (buf[0] == '[')
|
|
{
|
|
cr = NULL;
|
|
str = buf + 1;
|
|
aflag = 0;
|
|
if ((ptr = strchr(str, ']')) != NULL)
|
|
{
|
|
*ptr = '\0';
|
|
if (!strcasecmp(str, "attrs"))
|
|
{
|
|
aflag = 1;
|
|
}
|
|
// all the non [attrs] section - need to be read by line
|
|
else if ((i = _rule_index_get(r, str)) >= 0)
|
|
{
|
|
rbl = 1; /* default read by line = yes */
|
|
cr = &r->items[i];
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/* attr flag open? */
|
|
if (aflag == 1) // [attrs] section
|
|
{
|
|
/* parse the attr line */
|
|
|
|
// str - first part (before +)
|
|
// ptr - second (after + before =)
|
|
// qtr - third (after =)
|
|
|
|
str = buf;
|
|
while (*str == ' ' || *str == '\t') str++;
|
|
if ((ptr = strchr(str, '+')) == NULL) continue;
|
|
*ptr++ = '\0';
|
|
if ((qtr = strchr(ptr, '=')) == NULL) continue;
|
|
*qtr++ = '\0';
|
|
|
|
/* create new memory */
|
|
a = (rule_attr_t) malloc(sizeof(struct scws_rule_attr));
|
|
memset(a, 0, sizeof(struct scws_rule_attr));
|
|
|
|
/* get ratio */
|
|
while(*qtr == ' ' || *qtr == '\t') qtr++;
|
|
a->ratio = (short) atoi(qtr);
|
|
if (a->ratio < 1)
|
|
a->ratio = 1;
|
|
a->npath[0] = a->npath[1] = 0xff;
|
|
|
|
/* read attr1 & npath1? */
|
|
a->attr1[0] = *str++;
|
|
if (*str && *str != '(' && *str != ' ' && *str != '\t')
|
|
a->attr1[1] = *str++;
|
|
while (*str && *str != '(') str++;
|
|
if (*str == '(')
|
|
{
|
|
str++;
|
|
if ((qtr = strchr(str, ')')) != NULL)
|
|
{
|
|
*qtr = '\0';
|
|
a->npath[0] = (unsigned char) atoi(str);
|
|
if (a->npath[0] > 0)
|
|
a->npath[0]--;
|
|
else
|
|
a->npath[0] = 0xff;
|
|
}
|
|
}
|
|
|
|
/* read attr2 & npath2? */
|
|
str = ptr;
|
|
while (*str == ' ' || *str == '\t') str++;
|
|
a->attr2[0] = *str++;
|
|
if (*str && *str != '(' && *str != ' ' && *str != '\t')
|
|
a->attr2[1] = *str++;
|
|
while (*str && *str != '(') str++;
|
|
if (*str == '(')
|
|
{
|
|
str++;
|
|
if ((qtr = strchr(str, ')')) != NULL)
|
|
{
|
|
*qtr = '\0';
|
|
a->npath[1] = (unsigned char) atoi(str);
|
|
if (a->npath[1] > 0)
|
|
a->npath[1]--;
|
|
else
|
|
a->npath[1] = 0xff;
|
|
}
|
|
}
|
|
|
|
//printf("%c%c(%d)+%c%c(%d)=%d\n", a->attr1[0], a->attr1[1] ? a->attr1[1] : ' ', a->npath[0],
|
|
// a->attr2[0], a->attr2[1] ? a->attr2[1] : ' ', a->npath[1], a->ratio);
|
|
|
|
/* append to the chain list */
|
|
if (r->attr == NULL)
|
|
r->attr = rtail = a;
|
|
else
|
|
{
|
|
rtail->next = a;
|
|
rtail = a;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
if (cr == NULL) // cr == NULL for [attrs] section
|
|
continue;
|
|
|
|
/* param set: line|znum|include|exclude|type|tf|idf|attr */
|
|
if (buf[0] == ':')
|
|
{
|
|
str = buf + 1;
|
|
if (!(ptr = strchr(str, '=')))
|
|
continue;
|
|
while (*str == ' ' || *str == '\t') str++;
|
|
|
|
qtr = ptr + 1;
|
|
while (ptr > str && (ptr[-1] == ' ' || ptr[-1] == '\t')) ptr--;
|
|
*ptr = '\0';
|
|
ptr = str;
|
|
str = qtr;
|
|
while (*str == ' ' || *str == '\t') str++;
|
|
|
|
if (!strcmp(ptr, "line"))
|
|
rbl = (*str == 'N' || *str == 'n') ? 0 : 1;
|
|
else if (!strcmp(ptr, "tf"))
|
|
cr->tf = (float) atof(str);
|
|
else if (!strcmp(ptr, "idf"))
|
|
cr->idf = (float) atof(str);
|
|
else if (!strcmp(ptr, "attr"))
|
|
strncpy(cr->attr, str, 2);
|
|
else if (!strcmp(ptr, "znum"))
|
|
{
|
|
if ((ptr = strchr(str, ',')) != NULL)
|
|
{
|
|
*ptr++ = '\0';
|
|
while (*ptr == ' ' || *ptr == '\t') ptr++;
|
|
cr->zmax = atoi(ptr);
|
|
cr->flag |= SCWS_ZRULE_RANGE;
|
|
}
|
|
cr->zmin = atoi(str);
|
|
}
|
|
else if (!strcmp(ptr, "type"))
|
|
{
|
|
if (!strncmp(str, "prefix", 6))
|
|
cr->flag |= SCWS_ZRULE_PREFIX;
|
|
else if (!strncmp(str, "suffix", 6))
|
|
cr->flag |= SCWS_ZRULE_SUFFIX;
|
|
}
|
|
else if (!strcmp(ptr, "include") || !strcmp(ptr, "exclude"))
|
|
{
|
|
unsigned int *clude;
|
|
|
|
if (!strcmp(ptr, "include"))
|
|
{
|
|
clude = &cr->inc;
|
|
cr->flag |= SCWS_ZRULE_INCLUDE;
|
|
}
|
|
else
|
|
{
|
|
clude = &cr->exc;
|
|
cr->flag |= SCWS_ZRULE_EXCLUDE;
|
|
}
|
|
|
|
while ((ptr = strchr(str, ',')) != NULL)
|
|
{
|
|
while (ptr > str && (ptr[-1] == '\t' || ptr[-1] == ' ')) ptr--;
|
|
*ptr = '\0';
|
|
if ((i = _rule_index_get(r, str)) >= 0)
|
|
*clude |= r->items[i].bit;
|
|
|
|
str = ptr + 1;
|
|
while (*str == ' ' || *str == '\t' || *str == ',') str++;
|
|
}
|
|
|
|
ptr = strlen(str) + str;
|
|
while (ptr > str && strchr(" \t\r\n", ptr[-1])) ptr--;
|
|
*ptr = '\0';
|
|
if (ptr > str && (i = _rule_index_get(r, str)))
|
|
*clude |= r->items[i].bit;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/* read the entries */
|
|
str = buf;
|
|
while (*str == ' ' || *str == '\t') str++;
|
|
ptr = str + strlen(str);
|
|
while (ptr > str && strchr(" \t\r\n", ptr[-1])) ptr--;
|
|
*ptr = '\0';
|
|
|
|
/* emptry line */
|
|
if (ptr == str)
|
|
continue;
|
|
|
|
if (rbl) // if read-by-line - xtree_nput(r->tree, cr, ..., str, ptr -str)
|
|
xtree_nput(r->tree, cr, sizeof(struct scws_rule_item), str, ptr - str);
|
|
else
|
|
{
|
|
while (str < ptr)
|
|
{
|
|
j = mblen[(*str)];
|
|
|
|
#ifdef HAVE_NOT_QUIET
|
|
/* try to check repeat */
|
|
if ((i = (int) xtree_nget(r->tree, str, j, NULL)) != 0)
|
|
fprintf(stderr, "Reapeat word on %s|%s: %.*s\n", cr->name, ((rule_item_t) i)->name, j, str);
|
|
#endif
|
|
|
|
xtree_nput(r->tree, cr, sizeof(struct scws_rule_item), str, j);
|
|
str += j;
|
|
}
|
|
}
|
|
}
|
|
fclose(fp);
|
|
|
|
/* optimize the tree */
|
|
xtree_optimize(r->tree);
|
|
return r;
|
|
}
|
|
|
|
/* get rule attr x */
|
|
#define EQUAL_RULE_ATTR(x,y) ((y[0]=='*'&&y[1]=='\0') || (strcmp(x,y)==0))
|
|
#define EQUAL_RULE_NPATH(x,y) ((y[0]==0xff||y[0]==x[0])&&(y[1]==0xff||y[1]==x[1]))
|
|
|
|
// special attrs ratio list(single chain, 12bytes)
|
|
//typedef struct scws_rule_attr *rule_attr_t;
|
|
//struct scws_rule_attr
|
|
//{
|
|
// char attr1[SCWS_ATTR_LEN + 1]; // v1
|
|
// char attr2[SCWS_ATTR_LEN + 1]; // v2
|
|
// unsigned char npath[2]; // 0xff - by default, (1) -> 0, else -> 0xff
|
|
// short ratio; // <num-ratio>
|
|
// rule_attr_t next; // ptr to the next scws_rule_attr
|
|
//};
|
|
|
|
//typedef struct scws_rule {
|
|
// ...
|
|
// rule_attr_t attr; // [attrs] section: ptr to the first item of the attrs list
|
|
// ...
|
|
//} rule_st, *rule_t;
|
|
|
|
int
|
|
scws_rule_attr_ratio(rule_t r, const char *attr1, const char *attr2, const unsigned char *npath)
|
|
{
|
|
rule_attr_t a;
|
|
int ret = 1;
|
|
|
|
if (!r || (a = r->attr) == NULL)
|
|
return ret; // 1 - if there are no rules or [attrs] section is missing
|
|
|
|
// iterate through the [attrs] section items
|
|
while (a != NULL)
|
|
{
|
|
if (EQUAL_RULE_ATTR(attr1, a->attr1) && EQUAL_RULE_ATTR(attr2, a->attr2) && EQUAL_RULE_NPATH(npath, a->npath))
|
|
{
|
|
ret = (int) a->ratio; // found the given attr1/attr2/npath (rule attr1/attr2 could be '*')
|
|
break; // rule npath could be absent (0xff), which is just like a '*'
|
|
}
|
|
a = a->next;
|
|
}
|
|
return ret; // 1 - if the given attr1/attr2/npath not found
|
|
}
|
|
|
|
#undef EQUAL_RULE_ATTR
|
|
#undef EQUAL_RULE_NPATH
|
|
|
|
|
|
// scws.h
|
|
|
|
/* data structures */
|
|
typedef struct scws_result *scws_res_t;
|
|
struct scws_result
|
|
{
|
|
int off; // src text offset
|
|
float idf;
|
|
unsigned char len; // src text length
|
|
char attr[SCWS_ATTR_LEN + 1]; // SCWS_ATTR_LEN == 20
|
|
scws_res_t next;
|
|
};
|
|
|
|
// ...
|
|
|
|
typedef struct
|
|
{
|
|
xdict_t d;
|
|
rule_t r; // rule
|
|
unsigned char *mblen; // ???
|
|
unsigned int mode;
|
|
unsigned char *txt;
|
|
int zis;
|
|
int len;
|
|
int off;
|
|
int wend;
|
|
scws_res_t res0;
|
|
scws_res_t res1;
|
|
word_t **wmap;
|
|
struct scws_zchar *zmap;
|
|
} scws_st, *scws_t;
|
|
|
|
// scws.cpp
|
|
|
|
void scws_set_rule(scws_t s, const char *fpath)
|
|
{
|
|
if (s->r != NULL)
|
|
scws_rule_free(s->r);
|
|
|
|
s->r = scws_rule_new(fpath, s->mblen);
|
|
}
|
|
|
|
|
|
/* multibyte segment */
|
|
...
|
|
|
|
static void _scws_mseg_zone(scws_t s, int f, int t)
|
|
{
|
|
unsigned char *mpath, *npath;
|
|
word_t **wmap;
|
|
int x,i,j,m,n,j2,sz;
|
|
double weight, nweight;
|
|
char attr1[SCWS_ATTR_LEN];
|
|
|
|
mpath = npath = NULL;
|
|
weight = nweight = (double) 0.0;
|
|
|
|
wmap = s->wmap;
|
|
j2 = 0;
|
|
for (x = i = f; i <= t; i++)
|
|
{
|
|
j = _scws_mget_word(s, i, (x > i ? x - 1 : t));
|
|
if (j == i) continue;
|
|
// skip NR in NR
|
|
if (j < j2 && strcmp(wmap[i][j]->attr, attr_nr) == 0) continue;
|
|
if (i > j2 && (wmap[i][j]->flag & SCWS_WORD_USED)) continue;
|
|
|
|
/* one word only */
|
|
if (i == f && j == t)
|
|
{
|
|
mpath = (unsigned char *) malloc(2);
|
|
mpath[0] = j - i;
|
|
mpath[1] = 0xff;
|
|
break;
|
|
}
|
|
|
|
if (i != f && (wmap[i][j]->flag & SCWS_WORD_RULE))
|
|
continue;
|
|
|
|
/* create the new path */
|
|
wmap[i][j]->flag |= SCWS_WORD_USED;
|
|
nweight = (double) wmap[i][j]->tf * pow(j-i,4);
|
|
|
|
if (npath == NULL)
|
|
{
|
|
npath = (unsigned char *) malloc(t-f+2);
|
|
memset(npath, 0xff, t-f+2);
|
|
}
|
|
|
|
/* lookfor backward */
|
|
x = sz = 0;
|
|
memset(attr1, 0, sizeof(attr1));
|
|
for (m = f; m < i; m = n+1)
|
|
{
|
|
n = _scws_mget_word(s, m, i-1);
|
|
nweight *= wmap[m][n]->tf;
|
|
npath[x++] = n - m;
|
|
if (n > m)
|
|
{
|
|
nweight *= pow(n-m,4);
|
|
wmap[m][n]->flag |= SCWS_WORD_USED;
|
|
}
|
|
else sz++;
|
|
|
|
if (attr1[0] != '\0')
|
|
nweight *= scws_rule_attr_ratio(s->r, attr1, wmap[m][n]->attr, &npath[x-2]);
|
|
memcpy(attr1, wmap[m][n]->attr, SCWS_ATTR_LEN);
|
|
}
|
|
|
|
/* my self */
|
|
npath[x++] = j - i;
|
|
|
|
if (attr1[0] != '\0')
|
|
nweight *= scws_rule_attr_ratio(s->r, attr1, wmap[i][j]->attr, &npath[x-2]);
|
|
memcpy(attr1, wmap[i][j]->attr, SCWS_ATTR_LEN);
|
|
|
|
/* lookfor forward */
|
|
for (m = j+1; m <= t; m = n+1)
|
|
{
|
|
n = _scws_mget_word(s, m, t);
|
|
nweight *= wmap[m][n]->tf;
|
|
npath[x++] = n - m;
|
|
if (n > m)
|
|
{
|
|
nweight *= pow(n-m,4);
|
|
wmap[m][n]->flag |= SCWS_WORD_USED;
|
|
}
|
|
else sz++;
|
|
|
|
nweight *= scws_rule_attr_ratio(s->r, attr1, wmap[m][n]->attr, &npath[x-2]);
|
|
memcpy(attr1, wmap[m][n]->attr, SCWS_ATTR_LEN);
|
|
}
|
|
|
|
npath[x] = 0xff;
|
|
nweight /= pow(x+sz-1,5);
|
|
|
|
/* draw the path for debug */
|
|
#ifdef HAVE_NOT_QUIET
|
|
if (s->mode & SCWS_DEBUG)
|
|
{
|
|
fprintf(stderr, "PATH by keyword = %.*s, (weight=%.4f):\n",
|
|
s->zmap[j].end - s->zmap[i].start, s->txt + s->zmap[i].start, nweight);
|
|
for (x = 0, m = f; (n = npath[x]) != 0xff; x++)
|
|
{
|
|
n += m;
|
|
fprintf(stderr, "%.*s ", s->zmap[n].end - s->zmap[m].start, s->txt + s->zmap[m].start);
|
|
m = n + 1;
|
|
}
|
|
fprintf(stderr, "\n--\n");
|
|
}
|
|
#endif
|
|
|
|
j2 = x = j;
|
|
if (x - i > 1) i--;
|
|
/* check better path */
|
|
if (nweight > weight)
|
|
{
|
|
unsigned char *swap;
|
|
|
|
weight = nweight;
|
|
swap = mpath;
|
|
mpath = npath;
|
|
npath = swap;
|
|
}
|
|
}
|
|
|
|
/* set the result, mpath != NULL */
|
|
if (mpath == NULL)
|
|
return;
|
|
|
|
for (x = 0, m = f; (n = mpath[x]) != 0xff; x++)
|
|
{
|
|
n += m;
|
|
_scws_mset_word(s, m, n);
|
|
m = n + 1;
|
|
}
|
|
|
|
/* Ò»¿Ú.070808: memory leak fixed. */
|
|
if (mpath) free(mpath);
|
|
if (npath) free(npath);
|
|
}
|
|
|
|
|
|
// xtree.h
|
|
|
|
/* pool required */
|
|
#include "pool.h"
|
|
|
|
/* data structure for Hash+Tree */
|
|
typedef struct tree_node node_st, *node_t;
|
|
struct tree_node
|
|
{
|
|
char *key;
|
|
void *value;
|
|
int vlen;
|
|
node_t left;
|
|
node_t right;
|
|
};
|
|
|
|
typedef struct
|
|
{
|
|
pool_t p; /* pool for memory manager */
|
|
int base; /* base number for hasher (prime number recommend) */
|
|
int prime; /* good prime number for hasher */
|
|
int count; /* total nodes */
|
|
node_t *trees; /* trees [total=prime+1] */
|
|
} xtree_st, *xtree_t;
|
|
|
|
...
|
|
|
|
// xtree.cpp
|
|
|
|
/* private static functions */
|
|
static int _xtree_hasher(xtree_t xt, const char *s, int len)
|
|
{
|
|
unsigned int h = xt->base;
|
|
while (len--)
|
|
{
|
|
h += (h<<5);
|
|
h ^= (unsigned char) s[len];
|
|
h &= 0x7fffffff;
|
|
}
|
|
return (h % xt->prime);
|
|
}
|
|
|
|
static node_t _xtree_node_search(node_t head, node_t **pnode, const char *key, int len)
|
|
{
|
|
int cmp;
|
|
|
|
cmp = memcmp(key, head->key, len);
|
|
if (cmp == 0)
|
|
cmp = len - strlen(head->key);
|
|
|
|
if (cmp != 0)
|
|
{
|
|
node_t *next;
|
|
|
|
next = (cmp > 0 ? &head->right : &head->left);
|
|
if (*next == NULL)
|
|
{
|
|
if (pnode != NULL)
|
|
*pnode = next;
|
|
return NULL;
|
|
}
|
|
return _xtree_node_search(*next, pnode, key, len);
|
|
}
|
|
return head;
|
|
}
|
|
|
|
static node_t _xtree_node_find(xtree_t xt, node_t **pnode, const char *key, int len)
|
|
{
|
|
int i;
|
|
i = (xt->prime > 1 ? _xtree_hasher(xt, key, len) : 0);
|
|
if (xt->trees[i] == NULL)
|
|
{
|
|
if (pnode != NULL)
|
|
*pnode = &xt->trees[i];
|
|
return NULL;
|
|
}
|
|
return _xtree_node_search(xt->trees[i], pnode, key, len);
|
|
}
|
|
|
|
/* public functions */
|
|
|
|
...
|
|
|
|
void xtree_nput(xtree_t xt, void *value, int vlen, const char *key, int len)
|
|
{
|
|
node_t node, *pnode;
|
|
|
|
if (xt == NULL || key == NULL || len == 0)
|
|
return;
|
|
|
|
if ((node = _xtree_node_find(xt, &pnode, key, len)) != NULL)
|
|
{
|
|
node->value = value;
|
|
node->vlen = vlen;
|
|
return;
|
|
}
|
|
|
|
if (value != NULL)
|
|
{
|
|
*pnode = node = (node_t) pmalloc(xt->p, sizeof(node_st));
|
|
node->key = pstrndup(xt->p, key, len);
|
|
node->value = value;
|
|
node->vlen = vlen;
|
|
node->left = NULL;
|
|
node->right = NULL;
|
|
}
|
|
}
|
|
|
|
void xtree_put(xtree_t xt, const char *value, const char *key)
|
|
{
|
|
if (xt != NULL && key != NULL)
|
|
xtree_nput(xt, (void *) value, value ? strlen(value) : 0, key, strlen(key));
|
|
}
|
|
|