зеркало из
				https://github.com/iharh/notes.git
				synced 2025-10-31 21:56:08 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			279 строки
		
	
	
		
			8.2 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			279 строки
		
	
	
		
			8.2 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| // io.c
 | |
| 
 | |
| fsm_read_binary_handle fsm_read_binary_file_multiple_init(char *filename) {
 | |
| 
 | |
|     struct io_buf_handle *iobh;
 | |
|     fsm_read_binary_handle fsm_read_handle;
 | |
| 
 | |
|     iobh = io_init();
 | |
|     if (io_gz_file_to_mem(iobh, filename) == 0) {
 | |
| 	io_free(iobh);
 | |
| 	return NULL;
 | |
|     }
 | |
|     fsm_read_handle = (void *) iobh;
 | |
|     return(fsm_read_handle);
 | |
| }
 | |
| 
 | |
| struct fsm *fsm_read_binary_file_multiple(fsm_read_binary_handle fsrh) {
 | |
|     char *net_name;
 | |
|     struct fsm *net;
 | |
|     struct io_buf_handle *iobh;
 | |
|     iobh = (struct io_buf_handle *) fsrh;
 | |
|     net = io_net_read(iobh, &net_name);
 | |
|     if (net == NULL) {
 | |
| 	io_free(iobh);
 | |
| 	return(NULL);
 | |
|     } else {
 | |
| 	xxfree(net_name);
 | |
| 	return(net);
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* The file format we use is an extremely simple text format */
 | |
| /* which is gzip compressed through libz and consists of the following sections: */
 | |
| 
 | |
| /* ##foma-net VERSION##*/
 | |
| /* ##props## */
 | |
| /* PROPERTIES LINE */
 | |
| /* ##sigma## */
 | |
| /* ...SIGMA LINES... */
 | |
| /* ##states## */
 | |
| /* ...TRANSITION LINES... */ 
 | |
| /* ##end## */
 | |
| 
 | |
| /* Several networks may be concatenated in one file */
 | |
| 
 | |
| /* The initial identifier is "##foma-net 1.0##" */
 | |
| /* where 1.0 is the version number for the file format */
 | |
| /* followed by the line "##props##" */
 | |
| /* which is followed by a line of space separated integers */
 | |
| /* which correpond to: */
 | |
| 
 | |
| /* arity arccount statecount linecount finalcount pathcount is_deterministic */
 | |
| /* is_pruned is_minimized is_epsilon_free is_loop_free is_completed name  */
 | |
| 
 | |
| /* where name is used if defined networks are saved/loaded */
 | |
| 
 | |
| /* Following the props line, we accept anything (for future expansion) */
 | |
| /* until we find ##sigma## */
 | |
| 
 | |
| /* the section beginning with "##sigma##" consists of lines with two fields: */
 | |
| /* number string */
 | |
| /* correponding to the symbol number and the symbol string */
 | |
| 
 | |
| /* the section beginning with "##states##" consists of lines of ASCII integers */
 | |
| /* with 2-5 fields to avoid some redundancy in every line corresponding to a */
 | |
| /* transition where otherwise state numbers would be unnecessarily repeated and */
 | |
| /* out symbols also (if in = out as is the case for recognizers/simple automata) */
 | |
| 
 | |
| /* The information depending on the number of fields in the lines is as follows: */
 | |
| 
 | |
| /* 2: in target (here state_no is the same as the last mentioned one and out = in) */
 | |
| /* 3: in out target (again, state_no is the same as the last mentioned one) */
 | |
| /* 4: state_no in target final_state (where out = in) */
 | |
| /* 5: state_no in out target final_state */
 | |
| 
 | |
| /* There is no harm in always using 5 fields; however this will take up more space */
 | |
| 
 | |
| /* As in struct fsm_state, states without transitions are represented as a 4-field: */
 | |
| /* state_no -1 -1 final_state (since in=out for 4-field lines, out = -1 as well) */
 | |
| 
 | |
| /* AS gzopen will read uncompressed files as well, one can gunzip a file */
 | |
| /* that contains a network and still read it */
 | |
| 
 | |
| struct fsm *io_net_read(struct io_buf_handle *iobh, char **net_name) {
 | |
| 
 | |
|     char buf[READ_BUF_SIZE];
 | |
|     struct fsm *net;
 | |
|     struct fsm_state *fsm;
 | |
|     
 | |
|     char *new_symbol;
 | |
|     int i, items, new_symbol_number, laststate, lineint[5], *cm;
 | |
|     int extras;
 | |
|     char last_final = '1';
 | |
| 
 | |
|     if (io_gets(iobh, buf) == 0) {
 | |
|         return NULL;
 | |
|     }
 | |
|     
 | |
|     net = fsm_create("");
 | |
| 
 | |
|     if (strcmp(buf, "##foma-net 1.0##") != 0) {
 | |
| 	fsm_destroy(net);
 | |
|         perror("File format error foma!\n");
 | |
|         return NULL;
 | |
|     }
 | |
|     io_gets(iobh, buf);
 | |
|     if (strcmp(buf, "##props##") != 0) {
 | |
|         perror("File format error props!\n");
 | |
| 	fsm_destroy(net);
 | |
|         return NULL;
 | |
|     }
 | |
|     /* Properties */
 | |
|     io_gets(iobh, buf);
 | |
|     extras = 0;
 | |
|     sscanf(buf, "%i %i %i %i %i %lld %i %i %i %i %i %i %s", &net->arity, &net->arccount, &net->statecount, &net->linecount, &net->finalcount, &net->pathcount, &net->is_deterministic, &net->is_pruned, &net->is_minimized, &net->is_epsilon_free, &net->is_loop_free, &extras, buf);
 | |
|     strcpy(net->name, buf);
 | |
|     *net_name = xxstrdup(buf);
 | |
|     io_gets(iobh, buf);
 | |
| 
 | |
|     net->is_completed = (extras & 3);
 | |
|     net->arcs_sorted_in = (extras & 12) >> 2;
 | |
|     net->arcs_sorted_out = (extras & 48) >> 4;
 | |
| 
 | |
|     /* Sigma */
 | |
|     while (strcmp(buf, "##sigma##") != 0) { /* Loop until we encounter ##sigma## */
 | |
|         if (buf[0] == '\0') {
 | |
| 	  printf("File format error at sigma definition!\n");
 | |
| 	  fsm_destroy(net);
 | |
| 	  return NULL;
 | |
|         }
 | |
|         io_gets(iobh, buf);
 | |
|     }
 | |
| 
 | |
|     for (;;) {
 | |
|         io_gets(iobh, buf);
 | |
|         if (buf[0] == '#') break;
 | |
|         if (buf[0] == '\0') continue;
 | |
|         new_symbol = strstr(buf, " ");
 | |
| 	new_symbol[0] = '\0';
 | |
| 	new_symbol++;
 | |
| 	if (new_symbol[0] == '\0') {
 | |
| 	    sscanf(buf,"%i", &new_symbol_number);
 | |
| 	    sigma_add_number(net->sigma, "\n", new_symbol_number);
 | |
| 	} else {
 | |
| 	    sscanf(buf,"%i", &new_symbol_number);
 | |
| 	    sigma_add_number(net->sigma, new_symbol, new_symbol_number);
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     /* States */
 | |
|     if (strcmp(buf, "##states##") != 0) {
 | |
|         printf("File format error!\n");
 | |
|         return NULL;
 | |
|     }
 | |
|     net->states = xxmalloc(net->linecount*sizeof(struct fsm_state));
 | |
|     fsm = net->states;
 | |
|     laststate = -1;
 | |
|     for (i=0; ;i++) {
 | |
|         io_gets(iobh, buf);
 | |
|         if (buf[0] == '#') break;
 | |
| 
 | |
|         /* scanf is just too slow here */
 | |
| 
 | |
|         //items = sscanf(buf, "%i %i %i %i %i",&lineint[0], &lineint[1], &lineint[2], &lineint[3], &lineint[4]);
 | |
| 
 | |
|         items = explode_line(buf, &lineint[0]);
 | |
| 
 | |
|         switch (items) {
 | |
|         case 2:
 | |
|             (fsm+i)->state_no = laststate;
 | |
|             (fsm+i)->in = lineint[0];
 | |
|             (fsm+i)->out = lineint[0];
 | |
|             (fsm+i)->target = lineint[1];
 | |
|             (fsm+i)->final_state = last_final;
 | |
|             break;
 | |
|         case 3:
 | |
|             (fsm+i)->state_no = laststate;
 | |
|             (fsm+i)->in = lineint[0];
 | |
|             (fsm+i)->out = lineint[1];
 | |
|             (fsm+i)->target = lineint[2];
 | |
|             (fsm+i)->final_state = last_final;
 | |
|             break;
 | |
|         case 4:
 | |
|             (fsm+i)->state_no = lineint[0];
 | |
|             (fsm+i)->in = lineint[1];
 | |
|             (fsm+i)->out = lineint[1];
 | |
|             (fsm+i)->target = lineint[2];
 | |
|             (fsm+i)->final_state = lineint[3];
 | |
|             laststate = lineint[0];
 | |
|             last_final = lineint[3];
 | |
|             break;
 | |
|         case 5:
 | |
|             (fsm+i)->state_no = lineint[0];
 | |
|             (fsm+i)->in = lineint[1];
 | |
|             (fsm+i)->out = lineint[2];
 | |
|             (fsm+i)->target = lineint[3];
 | |
|             (fsm+i)->final_state = lineint[4];
 | |
|             laststate = lineint[0];
 | |
|             last_final = lineint[4];
 | |
|             break;
 | |
|         default:
 | |
|             printf("File format error\n");
 | |
|             return NULL;
 | |
|         }
 | |
|         if (laststate > 0) {
 | |
|             (fsm+i)->start_state = 0;
 | |
|         } else if (laststate == -1) {
 | |
|             (fsm+i)->start_state = -1;
 | |
|         } else {
 | |
|             (fsm+i)->start_state = 1;
 | |
|         }
 | |
| 
 | |
|     }
 | |
|     if (strcmp(buf, "##cmatrix##") == 0) {
 | |
|         cmatrix_init(net);
 | |
|         cm = net->medlookup->confusion_matrix;
 | |
|         for (;;) {
 | |
|             io_gets(iobh, buf);
 | |
|             if (buf[0] == '#') break;
 | |
|             sscanf(buf,"%i", &i);
 | |
|             *cm = i;
 | |
|             cm++;
 | |
|         }
 | |
|     }
 | |
|     if (strcmp(buf, "##end##") != 0) {
 | |
|         printf("File format error!\n");
 | |
|         return NULL;
 | |
|     }
 | |
|     return(net);
 | |
| }
 | |
| 
 | |
| static int io_gets(struct io_buf_handle *iobh, char *target) {
 | |
|     int i;
 | |
|     for (i = 0; *((iobh->io_buf_ptr)+i) != '\n' && *((iobh->io_buf_ptr)+i) != '\0'; i++) {
 | |
|         *(target+i) = *((iobh->io_buf_ptr)+i);
 | |
|     }
 | |
|     *(target+i) = '\0';
 | |
|     if (*((iobh->io_buf_ptr)+i) == '\0')
 | |
|     (iobh->io_buf_ptr) = (iobh->io_buf_ptr) + i;
 | |
|     else
 | |
|         (iobh->io_buf_ptr) = (iobh->io_buf_ptr) + i + 1;
 | |
| 
 | |
|     return(i);
 | |
| }
 | |
| 
 | |
| size_t io_gz_file_to_mem(struct io_buf_handle *iobh, char *filename) {
 | |
| 
 | |
|     size_t size;
 | |
|     gzFile FILE;
 | |
| 
 | |
|     size = io_get_file_size(filename);
 | |
|     if (size == 0) {
 | |
|         return 0;
 | |
|     }
 | |
|     (iobh->io_buf) = xxmalloc((size+1)*sizeof(char));
 | |
|     FILE = gzopen(filename, "rb");
 | |
|     gzread(FILE, iobh->io_buf, size);
 | |
|     gzclose(FILE);
 | |
|     *((iobh->io_buf)+size) = '\0';
 | |
|     iobh->io_buf_ptr = iobh->io_buf;
 | |
|     return(size);
 | |
| }
 | |
| 
 | |
| struct io_buf_handle *io_init() {
 | |
|     struct io_buf_handle *iobh;
 | |
|     iobh = xxmalloc(sizeof(struct io_buf_handle));
 | |
|     (iobh->io_buf) = NULL;
 | |
|     (iobh->io_buf_ptr) = NULL;
 | |
|     return(iobh);
 | |
| }
 | |
| 
 | |
| void io_free(struct io_buf_handle *iobh) {
 | |
|     if (iobh->io_buf != NULL) {
 | |
|         xxfree(iobh->io_buf);
 | |
|         (iobh->io_buf) = NULL;
 | |
|     }
 | |
|     xxfree(iobh);
 | |
| }
 | 
