notes/science/nlp/apply.txt
Ihar Hancharenka 5dff80e88e first
2023-03-27 16:52:17 +03:00

255 строки
6.3 KiB
Plaintext

// fomalibconf.h
struct apply_handle {
int ptr;
int curr_ptr;
int ipos;
int opos;
int mode;
int printcount;
int *numlines;
int *statemap;
int *marks;
struct sigma_trie {
int signum;
struct sigma_trie *next;
} *sigma_trie;
struct sigmatch_array {
int signumber ;
int consumes ;
} *sigmatch_array;
struct sigma_trie_arrays {
struct sigma_trie *arr;
struct sigma_trie_arrays *next;
} *sigma_trie_arrays;
int binsearch;
int indexed;
int state_has_index;
int sigma_size;
int sigmatch_array_size;
int current_instring_length;
int has_flags;
int obey_flags;
int show_flags;
int print_space;
char *space_symbol;
char *separator;
char *epsilon_symbol;
int print_pairs;
int apply_stack_ptr;
int apply_stack_top;
int oldflagneg;
int outstringtop;
int iterate_old;
int iterator;
uint8_t *flagstates;
char *outstring;
char *instring;
struct sigs {
char *symbol;
int length;
} *sigs;
char *oldflagvalue;
struct fsm *last_net;
struct fsm_state *gstates;
struct sigma *gsigma;
struct apply_state_index {
int fsmptr;
struct apply_state_index *next;
} **index_in, **index_out, *iptr;
struct flag_list {
char *name;
char *value;
short neg;
struct flag_list *next;
} *flag_list;
struct flag_lookup {
int type;
char *name;
char *value;
} *flag_lookup ;
struct searchstack {
int offset;
struct apply_state_index *iptr;
int state_has_index;
int opos;
int ipos;
int visitmark;
char *flagname;
char *flagvalue;
int flagneg;
} *searchstack ;
};
// apply.c
struct apply_handle *apply_init(struct fsm *net) {
struct apply_handle *h;
srand((unsigned int) time(NULL));
h = calloc(1,sizeof(struct apply_handle));
/* Init */
h->iterate_old = 0;
h->iterator = 0;
h->instring = NULL;
h->flag_list = NULL;
h->flag_lookup = NULL;
h->obey_flags = 1;
h->show_flags = 0;
h->print_space = 0;
h->print_pairs = 0;
h->separator = strdup(":");
h->epsilon_symbol = strdup("0");
h->last_net = net;
h->outstring = xxmalloc(sizeof(char)*DEFAULT_OUTSTRING_SIZE);
h->outstringtop = DEFAULT_OUTSTRING_SIZE;
*(h->outstring) = '\0';
h->gstates = net->states;
h->gsigma = net->sigma;
h->printcount = 1;
apply_create_statemap(h, net);
h->searchstack = xxmalloc(sizeof(struct searchstack) * DEFAULT_STACK_SIZE);
h->apply_stack_top = DEFAULT_STACK_SIZE;
apply_stack_clear(h);
apply_create_sigarray(h, net);
return(h);
}
void apply_create_statemap(struct apply_handle *h, struct fsm *net) {
int i;
struct fsm_state *fsm;
fsm = net->states;
h->statemap = xxmalloc(sizeof(int)*net->statecount);
h->marks = xxmalloc(sizeof(int)*net->statecount);
h->numlines = xxmalloc(sizeof(int)*net->statecount);
for (i=0; i < net->statecount; i++) {
*(h->numlines+i) = 0; /* Only needed in binary search */
*(h->statemap+i) = -1;
*(h->marks+i) = 0;
}
for (i=0; (fsm+i)->state_no != -1; i++) {
*(h->numlines+(fsm+i)->state_no) = *(h->numlines+(fsm+i)->state_no)+1;
if (*(h->statemap+(fsm+i)->state_no) == -1) {
*(h->statemap+(fsm+i)->state_no) = i;
}
}
}
void apply_stack_clear (struct apply_handle *h) {
h->apply_stack_ptr = 0;
}
void apply_create_sigarray(struct apply_handle *h, struct fsm *net) {
struct sigma *sig;
int i, maxsigma;
maxsigma = sigma_max(net->sigma);
h->sigma_size = maxsigma+1;
// Default size created at init, resized later if necessary
h->sigmatch_array = xxcalloc(1024,sizeof(struct sigmatch_array));
h->sigmatch_array_size = 1024;
h->sigs = xxmalloc(sizeof(struct sigs)*(maxsigma+1));
h->has_flags = 0;
h->flag_list = NULL;
/* Malloc first array of trie and store trie ptrs to be able to free later */
/* when apply_clear() is called. */
h->sigma_trie = xxcalloc(256,sizeof(struct sigma_trie));
h->sigma_trie_arrays = xxmalloc(sizeof(struct sigma_trie_arrays));
h->sigma_trie_arrays->arr = h->sigma_trie;
h->sigma_trie_arrays->next = NULL;
for (i=0;i<256;i++)
(h->sigma_trie+i)->next = NULL;
for (sig = h->gsigma; sig != NULL && sig->number != -1; sig = sig->next) {
if (flag_check(sig->symbol)) {
h->has_flags = 1;
apply_add_flag(h, flag_get_name(sig->symbol));
}
(h->sigs+(sig->number))->symbol = sig->symbol;
(h->sigs+(sig->number))->length = strlen(sig->symbol);
/* Add sigma entry to trie */
if (sig->number > IDENTITY) {
apply_add_sigma_trie(h, sig->number, sig->symbol, (h->sigs+(sig->number))->length);
}
}
if (maxsigma >= IDENTITY) {
(h->sigs+EPSILON)->symbol = h->epsilon_symbol;
(h->sigs+EPSILON)->length = strlen(h->epsilon_symbol);
(h->sigs+UNKNOWN)->symbol = "?";
(h->sigs+UNKNOWN)->length = 1;
(h->sigs+IDENTITY)->symbol = "@";
(h->sigs+IDENTITY)->length = 1;
}
if (h->has_flags) {
h->flag_lookup = xxmalloc(sizeof(struct flag_lookup)*(maxsigma+1));
for (i=0; i <= maxsigma; i++) {
(h->flag_lookup+i)->type = 0;
(h->flag_lookup+i)->name = NULL;
(h->flag_lookup+i)->value = NULL;
}
for (sig = h->gsigma; sig != NULL ; sig = sig->next) {
if (flag_check(sig->symbol)) {
(h->flag_lookup+sig->number)->type = flag_get_type(sig->symbol);
(h->flag_lookup+sig->number)->name = flag_get_name(sig->symbol);
(h->flag_lookup+sig->number)->value = flag_get_value(sig->symbol);
}
}
apply_mark_flagstates(h);
}
}
// clb ??? further
char *apply_up(struct apply_handle *h, char *word) {
h->mode = UP;
if (h->index_out) {
h->indexed = 1;
} else {
h->indexed = 0;
}
h->binsearch = (h->last_net->arcs_sorted_out == 1) ? 1 : 0;
return(apply_updown(h, word));
}
char *apply_updown(struct apply_handle *h, char *word) {
char *result = NULL;
if (h->last_net == NULL || h->last_net->finalcount == 0)
return (NULL);
if (word == NULL) {
h->iterate_old = 1;
result = apply_net(h); // clb???
}
else if (word != NULL) {
h->iterate_old = 0;
h->instring = word;
apply_create_sigmatch(h); // clb???
/* Remove old marks if necessary TODO: only pop marks */
apply_force_clear_stack(h); // clb???
result = apply_net(h);
}
return(result);
}