// io.c fsm_read_binary_handle fsm_read_binary_file_multiple_init(char *filename) { struct io_buf_handle *iobh; fsm_read_binary_handle fsm_read_handle; iobh = io_init(); if (io_gz_file_to_mem(iobh, filename) == 0) { io_free(iobh); return NULL; } fsm_read_handle = (void *) iobh; return(fsm_read_handle); } struct fsm *fsm_read_binary_file_multiple(fsm_read_binary_handle fsrh) { char *net_name; struct fsm *net; struct io_buf_handle *iobh; iobh = (struct io_buf_handle *) fsrh; net = io_net_read(iobh, &net_name); if (net == NULL) { io_free(iobh); return(NULL); } else { xxfree(net_name); return(net); } } /* The file format we use is an extremely simple text format */ /* which is gzip compressed through libz and consists of the following sections: */ /* ##foma-net VERSION##*/ /* ##props## */ /* PROPERTIES LINE */ /* ##sigma## */ /* ...SIGMA LINES... */ /* ##states## */ /* ...TRANSITION LINES... */ /* ##end## */ /* Several networks may be concatenated in one file */ /* The initial identifier is "##foma-net 1.0##" */ /* where 1.0 is the version number for the file format */ /* followed by the line "##props##" */ /* which is followed by a line of space separated integers */ /* which correpond to: */ /* arity arccount statecount linecount finalcount pathcount is_deterministic */ /* is_pruned is_minimized is_epsilon_free is_loop_free is_completed name */ /* where name is used if defined networks are saved/loaded */ /* Following the props line, we accept anything (for future expansion) */ /* until we find ##sigma## */ /* the section beginning with "##sigma##" consists of lines with two fields: */ /* number string */ /* correponding to the symbol number and the symbol string */ /* the section beginning with "##states##" consists of lines of ASCII integers */ /* with 2-5 fields to avoid some redundancy in every line corresponding to a */ /* transition where otherwise state numbers would be unnecessarily repeated and */ /* out symbols also (if in = out as is the case for recognizers/simple automata) */ /* The information depending on the number of fields in the lines is as follows: */ /* 2: in target (here state_no is the same as the last mentioned one and out = in) */ /* 3: in out target (again, state_no is the same as the last mentioned one) */ /* 4: state_no in target final_state (where out = in) */ /* 5: state_no in out target final_state */ /* There is no harm in always using 5 fields; however this will take up more space */ /* As in struct fsm_state, states without transitions are represented as a 4-field: */ /* state_no -1 -1 final_state (since in=out for 4-field lines, out = -1 as well) */ /* AS gzopen will read uncompressed files as well, one can gunzip a file */ /* that contains a network and still read it */ struct fsm *io_net_read(struct io_buf_handle *iobh, char **net_name) { char buf[READ_BUF_SIZE]; struct fsm *net; struct fsm_state *fsm; char *new_symbol; int i, items, new_symbol_number, laststate, lineint[5], *cm; int extras; char last_final = '1'; if (io_gets(iobh, buf) == 0) { return NULL; } net = fsm_create(""); if (strcmp(buf, "##foma-net 1.0##") != 0) { fsm_destroy(net); perror("File format error foma!\n"); return NULL; } io_gets(iobh, buf); if (strcmp(buf, "##props##") != 0) { perror("File format error props!\n"); fsm_destroy(net); return NULL; } /* Properties */ io_gets(iobh, buf); extras = 0; sscanf(buf, "%i %i %i %i %i %lld %i %i %i %i %i %i %s", &net->arity, &net->arccount, &net->statecount, &net->linecount, &net->finalcount, &net->pathcount, &net->is_deterministic, &net->is_pruned, &net->is_minimized, &net->is_epsilon_free, &net->is_loop_free, &extras, buf); strcpy(net->name, buf); *net_name = xxstrdup(buf); io_gets(iobh, buf); net->is_completed = (extras & 3); net->arcs_sorted_in = (extras & 12) >> 2; net->arcs_sorted_out = (extras & 48) >> 4; /* Sigma */ while (strcmp(buf, "##sigma##") != 0) { /* Loop until we encounter ##sigma## */ if (buf[0] == '\0') { printf("File format error at sigma definition!\n"); fsm_destroy(net); return NULL; } io_gets(iobh, buf); } for (;;) { io_gets(iobh, buf); if (buf[0] == '#') break; if (buf[0] == '\0') continue; new_symbol = strstr(buf, " "); new_symbol[0] = '\0'; new_symbol++; if (new_symbol[0] == '\0') { sscanf(buf,"%i", &new_symbol_number); sigma_add_number(net->sigma, "\n", new_symbol_number); } else { sscanf(buf,"%i", &new_symbol_number); sigma_add_number(net->sigma, new_symbol, new_symbol_number); } } /* States */ if (strcmp(buf, "##states##") != 0) { printf("File format error!\n"); return NULL; } net->states = xxmalloc(net->linecount*sizeof(struct fsm_state)); fsm = net->states; laststate = -1; for (i=0; ;i++) { io_gets(iobh, buf); if (buf[0] == '#') break; /* scanf is just too slow here */ //items = sscanf(buf, "%i %i %i %i %i",&lineint[0], &lineint[1], &lineint[2], &lineint[3], &lineint[4]); items = explode_line(buf, &lineint[0]); switch (items) { case 2: (fsm+i)->state_no = laststate; (fsm+i)->in = lineint[0]; (fsm+i)->out = lineint[0]; (fsm+i)->target = lineint[1]; (fsm+i)->final_state = last_final; break; case 3: (fsm+i)->state_no = laststate; (fsm+i)->in = lineint[0]; (fsm+i)->out = lineint[1]; (fsm+i)->target = lineint[2]; (fsm+i)->final_state = last_final; break; case 4: (fsm+i)->state_no = lineint[0]; (fsm+i)->in = lineint[1]; (fsm+i)->out = lineint[1]; (fsm+i)->target = lineint[2]; (fsm+i)->final_state = lineint[3]; laststate = lineint[0]; last_final = lineint[3]; break; case 5: (fsm+i)->state_no = lineint[0]; (fsm+i)->in = lineint[1]; (fsm+i)->out = lineint[2]; (fsm+i)->target = lineint[3]; (fsm+i)->final_state = lineint[4]; laststate = lineint[0]; last_final = lineint[4]; break; default: printf("File format error\n"); return NULL; } if (laststate > 0) { (fsm+i)->start_state = 0; } else if (laststate == -1) { (fsm+i)->start_state = -1; } else { (fsm+i)->start_state = 1; } } if (strcmp(buf, "##cmatrix##") == 0) { cmatrix_init(net); cm = net->medlookup->confusion_matrix; for (;;) { io_gets(iobh, buf); if (buf[0] == '#') break; sscanf(buf,"%i", &i); *cm = i; cm++; } } if (strcmp(buf, "##end##") != 0) { printf("File format error!\n"); return NULL; } return(net); } static int io_gets(struct io_buf_handle *iobh, char *target) { int i; for (i = 0; *((iobh->io_buf_ptr)+i) != '\n' && *((iobh->io_buf_ptr)+i) != '\0'; i++) { *(target+i) = *((iobh->io_buf_ptr)+i); } *(target+i) = '\0'; if (*((iobh->io_buf_ptr)+i) == '\0') (iobh->io_buf_ptr) = (iobh->io_buf_ptr) + i; else (iobh->io_buf_ptr) = (iobh->io_buf_ptr) + i + 1; return(i); } size_t io_gz_file_to_mem(struct io_buf_handle *iobh, char *filename) { size_t size; gzFile FILE; size = io_get_file_size(filename); if (size == 0) { return 0; } (iobh->io_buf) = xxmalloc((size+1)*sizeof(char)); FILE = gzopen(filename, "rb"); gzread(FILE, iobh->io_buf, size); gzclose(FILE); *((iobh->io_buf)+size) = '\0'; iobh->io_buf_ptr = iobh->io_buf; return(size); } struct io_buf_handle *io_init() { struct io_buf_handle *iobh; iobh = xxmalloc(sizeof(struct io_buf_handle)); (iobh->io_buf) = NULL; (iobh->io_buf_ptr) = NULL; return(iobh); } void io_free(struct io_buf_handle *iobh) { if (iobh->io_buf != NULL) { xxfree(iobh->io_buf); (iobh->io_buf) = NULL; } xxfree(iobh); }