зеркало из
https://github.com/iharh/notes.git
synced 2025-10-29 12:46:06 +02:00
279 строки
8.2 KiB
Plaintext
279 строки
8.2 KiB
Plaintext
// io.c
|
|
|
|
fsm_read_binary_handle fsm_read_binary_file_multiple_init(char *filename) {
|
|
|
|
struct io_buf_handle *iobh;
|
|
fsm_read_binary_handle fsm_read_handle;
|
|
|
|
iobh = io_init();
|
|
if (io_gz_file_to_mem(iobh, filename) == 0) {
|
|
io_free(iobh);
|
|
return NULL;
|
|
}
|
|
fsm_read_handle = (void *) iobh;
|
|
return(fsm_read_handle);
|
|
}
|
|
|
|
struct fsm *fsm_read_binary_file_multiple(fsm_read_binary_handle fsrh) {
|
|
char *net_name;
|
|
struct fsm *net;
|
|
struct io_buf_handle *iobh;
|
|
iobh = (struct io_buf_handle *) fsrh;
|
|
net = io_net_read(iobh, &net_name);
|
|
if (net == NULL) {
|
|
io_free(iobh);
|
|
return(NULL);
|
|
} else {
|
|
xxfree(net_name);
|
|
return(net);
|
|
}
|
|
}
|
|
|
|
/* The file format we use is an extremely simple text format */
|
|
/* which is gzip compressed through libz and consists of the following sections: */
|
|
|
|
/* ##foma-net VERSION##*/
|
|
/* ##props## */
|
|
/* PROPERTIES LINE */
|
|
/* ##sigma## */
|
|
/* ...SIGMA LINES... */
|
|
/* ##states## */
|
|
/* ...TRANSITION LINES... */
|
|
/* ##end## */
|
|
|
|
/* Several networks may be concatenated in one file */
|
|
|
|
/* The initial identifier is "##foma-net 1.0##" */
|
|
/* where 1.0 is the version number for the file format */
|
|
/* followed by the line "##props##" */
|
|
/* which is followed by a line of space separated integers */
|
|
/* which correpond to: */
|
|
|
|
/* arity arccount statecount linecount finalcount pathcount is_deterministic */
|
|
/* is_pruned is_minimized is_epsilon_free is_loop_free is_completed name */
|
|
|
|
/* where name is used if defined networks are saved/loaded */
|
|
|
|
/* Following the props line, we accept anything (for future expansion) */
|
|
/* until we find ##sigma## */
|
|
|
|
/* the section beginning with "##sigma##" consists of lines with two fields: */
|
|
/* number string */
|
|
/* correponding to the symbol number and the symbol string */
|
|
|
|
/* the section beginning with "##states##" consists of lines of ASCII integers */
|
|
/* with 2-5 fields to avoid some redundancy in every line corresponding to a */
|
|
/* transition where otherwise state numbers would be unnecessarily repeated and */
|
|
/* out symbols also (if in = out as is the case for recognizers/simple automata) */
|
|
|
|
/* The information depending on the number of fields in the lines is as follows: */
|
|
|
|
/* 2: in target (here state_no is the same as the last mentioned one and out = in) */
|
|
/* 3: in out target (again, state_no is the same as the last mentioned one) */
|
|
/* 4: state_no in target final_state (where out = in) */
|
|
/* 5: state_no in out target final_state */
|
|
|
|
/* There is no harm in always using 5 fields; however this will take up more space */
|
|
|
|
/* As in struct fsm_state, states without transitions are represented as a 4-field: */
|
|
/* state_no -1 -1 final_state (since in=out for 4-field lines, out = -1 as well) */
|
|
|
|
/* AS gzopen will read uncompressed files as well, one can gunzip a file */
|
|
/* that contains a network and still read it */
|
|
|
|
struct fsm *io_net_read(struct io_buf_handle *iobh, char **net_name) {
|
|
|
|
char buf[READ_BUF_SIZE];
|
|
struct fsm *net;
|
|
struct fsm_state *fsm;
|
|
|
|
char *new_symbol;
|
|
int i, items, new_symbol_number, laststate, lineint[5], *cm;
|
|
int extras;
|
|
char last_final = '1';
|
|
|
|
if (io_gets(iobh, buf) == 0) {
|
|
return NULL;
|
|
}
|
|
|
|
net = fsm_create("");
|
|
|
|
if (strcmp(buf, "##foma-net 1.0##") != 0) {
|
|
fsm_destroy(net);
|
|
perror("File format error foma!\n");
|
|
return NULL;
|
|
}
|
|
io_gets(iobh, buf);
|
|
if (strcmp(buf, "##props##") != 0) {
|
|
perror("File format error props!\n");
|
|
fsm_destroy(net);
|
|
return NULL;
|
|
}
|
|
/* Properties */
|
|
io_gets(iobh, buf);
|
|
extras = 0;
|
|
sscanf(buf, "%i %i %i %i %i %lld %i %i %i %i %i %i %s", &net->arity, &net->arccount, &net->statecount, &net->linecount, &net->finalcount, &net->pathcount, &net->is_deterministic, &net->is_pruned, &net->is_minimized, &net->is_epsilon_free, &net->is_loop_free, &extras, buf);
|
|
strcpy(net->name, buf);
|
|
*net_name = xxstrdup(buf);
|
|
io_gets(iobh, buf);
|
|
|
|
net->is_completed = (extras & 3);
|
|
net->arcs_sorted_in = (extras & 12) >> 2;
|
|
net->arcs_sorted_out = (extras & 48) >> 4;
|
|
|
|
/* Sigma */
|
|
while (strcmp(buf, "##sigma##") != 0) { /* Loop until we encounter ##sigma## */
|
|
if (buf[0] == '\0') {
|
|
printf("File format error at sigma definition!\n");
|
|
fsm_destroy(net);
|
|
return NULL;
|
|
}
|
|
io_gets(iobh, buf);
|
|
}
|
|
|
|
for (;;) {
|
|
io_gets(iobh, buf);
|
|
if (buf[0] == '#') break;
|
|
if (buf[0] == '\0') continue;
|
|
new_symbol = strstr(buf, " ");
|
|
new_symbol[0] = '\0';
|
|
new_symbol++;
|
|
if (new_symbol[0] == '\0') {
|
|
sscanf(buf,"%i", &new_symbol_number);
|
|
sigma_add_number(net->sigma, "\n", new_symbol_number);
|
|
} else {
|
|
sscanf(buf,"%i", &new_symbol_number);
|
|
sigma_add_number(net->sigma, new_symbol, new_symbol_number);
|
|
}
|
|
}
|
|
|
|
/* States */
|
|
if (strcmp(buf, "##states##") != 0) {
|
|
printf("File format error!\n");
|
|
return NULL;
|
|
}
|
|
net->states = xxmalloc(net->linecount*sizeof(struct fsm_state));
|
|
fsm = net->states;
|
|
laststate = -1;
|
|
for (i=0; ;i++) {
|
|
io_gets(iobh, buf);
|
|
if (buf[0] == '#') break;
|
|
|
|
/* scanf is just too slow here */
|
|
|
|
//items = sscanf(buf, "%i %i %i %i %i",&lineint[0], &lineint[1], &lineint[2], &lineint[3], &lineint[4]);
|
|
|
|
items = explode_line(buf, &lineint[0]);
|
|
|
|
switch (items) {
|
|
case 2:
|
|
(fsm+i)->state_no = laststate;
|
|
(fsm+i)->in = lineint[0];
|
|
(fsm+i)->out = lineint[0];
|
|
(fsm+i)->target = lineint[1];
|
|
(fsm+i)->final_state = last_final;
|
|
break;
|
|
case 3:
|
|
(fsm+i)->state_no = laststate;
|
|
(fsm+i)->in = lineint[0];
|
|
(fsm+i)->out = lineint[1];
|
|
(fsm+i)->target = lineint[2];
|
|
(fsm+i)->final_state = last_final;
|
|
break;
|
|
case 4:
|
|
(fsm+i)->state_no = lineint[0];
|
|
(fsm+i)->in = lineint[1];
|
|
(fsm+i)->out = lineint[1];
|
|
(fsm+i)->target = lineint[2];
|
|
(fsm+i)->final_state = lineint[3];
|
|
laststate = lineint[0];
|
|
last_final = lineint[3];
|
|
break;
|
|
case 5:
|
|
(fsm+i)->state_no = lineint[0];
|
|
(fsm+i)->in = lineint[1];
|
|
(fsm+i)->out = lineint[2];
|
|
(fsm+i)->target = lineint[3];
|
|
(fsm+i)->final_state = lineint[4];
|
|
laststate = lineint[0];
|
|
last_final = lineint[4];
|
|
break;
|
|
default:
|
|
printf("File format error\n");
|
|
return NULL;
|
|
}
|
|
if (laststate > 0) {
|
|
(fsm+i)->start_state = 0;
|
|
} else if (laststate == -1) {
|
|
(fsm+i)->start_state = -1;
|
|
} else {
|
|
(fsm+i)->start_state = 1;
|
|
}
|
|
|
|
}
|
|
if (strcmp(buf, "##cmatrix##") == 0) {
|
|
cmatrix_init(net);
|
|
cm = net->medlookup->confusion_matrix;
|
|
for (;;) {
|
|
io_gets(iobh, buf);
|
|
if (buf[0] == '#') break;
|
|
sscanf(buf,"%i", &i);
|
|
*cm = i;
|
|
cm++;
|
|
}
|
|
}
|
|
if (strcmp(buf, "##end##") != 0) {
|
|
printf("File format error!\n");
|
|
return NULL;
|
|
}
|
|
return(net);
|
|
}
|
|
|
|
static int io_gets(struct io_buf_handle *iobh, char *target) {
|
|
int i;
|
|
for (i = 0; *((iobh->io_buf_ptr)+i) != '\n' && *((iobh->io_buf_ptr)+i) != '\0'; i++) {
|
|
*(target+i) = *((iobh->io_buf_ptr)+i);
|
|
}
|
|
*(target+i) = '\0';
|
|
if (*((iobh->io_buf_ptr)+i) == '\0')
|
|
(iobh->io_buf_ptr) = (iobh->io_buf_ptr) + i;
|
|
else
|
|
(iobh->io_buf_ptr) = (iobh->io_buf_ptr) + i + 1;
|
|
|
|
return(i);
|
|
}
|
|
|
|
size_t io_gz_file_to_mem(struct io_buf_handle *iobh, char *filename) {
|
|
|
|
size_t size;
|
|
gzFile FILE;
|
|
|
|
size = io_get_file_size(filename);
|
|
if (size == 0) {
|
|
return 0;
|
|
}
|
|
(iobh->io_buf) = xxmalloc((size+1)*sizeof(char));
|
|
FILE = gzopen(filename, "rb");
|
|
gzread(FILE, iobh->io_buf, size);
|
|
gzclose(FILE);
|
|
*((iobh->io_buf)+size) = '\0';
|
|
iobh->io_buf_ptr = iobh->io_buf;
|
|
return(size);
|
|
}
|
|
|
|
struct io_buf_handle *io_init() {
|
|
struct io_buf_handle *iobh;
|
|
iobh = xxmalloc(sizeof(struct io_buf_handle));
|
|
(iobh->io_buf) = NULL;
|
|
(iobh->io_buf_ptr) = NULL;
|
|
return(iobh);
|
|
}
|
|
|
|
void io_free(struct io_buf_handle *iobh) {
|
|
if (iobh->io_buf != NULL) {
|
|
xxfree(iobh->io_buf);
|
|
(iobh->io_buf) = NULL;
|
|
}
|
|
xxfree(iobh);
|
|
}
|