/* cut.c - Cut from a file. * * Copyright 2012 Ranjan Kumar , Kyungwan Han * * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html * USE_CUT(NEWTOY(cut, "b:c:f:d:sn", TOYFLAG_BIN)) config CUT bool "cut" default y help usage: cut OPTION... [FILE]... Print selected parts of lines from each FILE to standard output. -b LIST select only these bytes from LIST. -c LIST select only these characters from LIST. -f LIST select only these fields. -d DELIM use DELIM instead of TAB for field delimiter. -s do not print lines not containing delimiters. -n don't split multibyte characters (Ignored). */ #define FOR_cut #include "toys.h" typedef struct _slist { int start_position; int end_position; struct _slist *next; }SLIST; GLOBALS( char *delim; char *flist; char *clist; char *blist; struct _slist *slist_head; unsigned nelem; ) #define BEGINOFLINE 0 #define ENDOFLINE INT_MAX #define NORANGE -1 static int get_user_options(void); void (*do_cut)(int); static void do_fcut(int fd); static void do_bccut(int fd); static void free_list(void); /* * add items in the slist. */ static void add_to_list(int start, int end) { SLIST *current, *head_ref, *temp1_node; head_ref = TT.slist_head; temp1_node = (SLIST *)xzalloc(sizeof(SLIST)); temp1_node->start_position = start; temp1_node->end_position = end; temp1_node->next = NULL; /* Special case for the head end */ if (head_ref == NULL || (head_ref)->start_position >= start) { temp1_node->next = head_ref; head_ref = temp1_node; } else { /* Locate the node before the point of insertion */ current = head_ref; while (current->next!=NULL && current->next->start_position < temp1_node->start_position) current = current->next; temp1_node->next = current->next; current->next = temp1_node; } TT.slist_head = head_ref; return; } /* * parse list and add to slist. */ static void parse_list(char *list) { char *ctoken, *dtoken; int start = 0, end = 0; while((ctoken = strsep(&list, ",")) != NULL) { if(!ctoken[0]) continue; //Get start position. dtoken = strsep(&ctoken, "-"); if(!dtoken[0]) start = BEGINOFLINE; else { start = get_int_value(dtoken, 0, INT_MAX); start = (start?(start-1):start); } //Get end position. if(ctoken == NULL) end = NORANGE; //case e.g. 1,2,3 else if(!ctoken[0]) end = ENDOFLINE; //case e.g. N- else {//case e.g. N-M end = get_int_value(ctoken, 0, INT_MAX); end = (end?end:ENDOFLINE); end--; if(end == start) end = NORANGE; } add_to_list(start, end); TT.nelem++; } //if list is missing in command line. if(TT.nelem == 0) error_exit("missing positions list:"); return; } /* * retrive data from the file/s. */ static void get_data(void) { char **argv = toys.optargs; //file name. toys.exitval = EXIT_SUCCESS; if(!*argv) do_cut(0); //for stdin else { for(; *argv; ++argv) { if(strcmp(*argv, "-") == 0) do_cut(0); //for stdin else { int fd = open(*argv, O_RDONLY, 0); if(fd < 0) {//if file not present then continue with other files. perror_msg(*argv); toys.exitval = EXIT_FAILURE; continue; } do_cut(fd); xclose(fd); } } } return; } /* * cut main function. */ void cut_main(void) { char delimiter = '\t'; //default delimiter. int num_of_options = 0; char *list; TT.nelem = 0; TT.slist_head = NULL; //verify the number of options provided by user. num_of_options = get_user_options(); if(num_of_options == 0) error_exit("specify a list of bytes, characters, or fields"); else if(num_of_options > 1) error_exit("only one type of list may be specified"); //Get list and assign the function. if(toys.optflags & FLAG_f) { list = TT.flist; do_cut = do_fcut; } else if(toys.optflags & FLAG_c) { list = TT.clist; do_cut = do_bccut; } else { list = TT.blist; do_cut = do_bccut; } if(toys.optflags & FLAG_d) { //delimiter must be 1 char. if(TT.delim[0] && TT.delim[1]) perror_exit("the delimiter must be a single character"); delimiter = TT.delim[0]; } if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) { TT.delim = (char *)xzalloc(2); TT.delim[0] = delimiter; } //when field is not specified, cutting has some special handling. if(!(toys.optflags & FLAG_f)) { if(toys.optflags & FLAG_s) perror_exit("suppressing non-delimited lines operating on fields"); if(delimiter != '\t') perror_exit("an input delimiter may be specified only when operating on fields"); } parse_list(list); get_data(); if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) { free(TT.delim); TT.delim = NULL; } free_list(); return; } /* * perform cut operation on the given delimiter. */ static void do_fcut(int fd) { char *buff; char *delimiter = TT.delim; while((buff = get_line(fd)) != NULL) { //does line have any delimiter?. if(strrchr(buff, (int)delimiter[0]) == NULL) { //if not then print whole line and move to next line. if(!(toys.optflags & FLAG_s)) xputs(buff); continue; } unsigned cpos = 0; int start, ndelimiters = -1; int nprinted_fields = 0; char *pfield = xzalloc(strlen(buff) + 1); SLIST *temp_node = TT.slist_head; if(temp_node != NULL) { //process list on each line. while(cpos < TT.nelem && buff) { if(!temp_node) break; start = temp_node->start_position; do { char *field = NULL; //count number of delimeters per line. while(buff) { if(ndelimiters < start) { ndelimiters++; field = strsep(&buff, delimiter); } else break; } //print field (if not yet printed). if(!pfield[ndelimiters]) { if(ndelimiters == start) { //put delimiter. if(nprinted_fields++ > 0) xputc(delimiter[0]); if(field) fputs(field, stdout); //make sure this field won't print again. pfield[ndelimiters] = (char) 0x23; //put some char at this position. } } start++; if((temp_node->end_position == NORANGE) || (!buff)) break; }while(start <= temp_node->end_position); temp_node = temp_node->next; cpos++; } } xputc('\n'); free(pfield); pfield = NULL; }//End of while loop. return; } /* * perform cut operation char or byte. */ static void do_bccut(int fd) { char *buff; while((buff = get_line(fd)) != NULL) { unsigned cpos = 0; int buffln = strlen(buff); char *pfield = xzalloc(buffln + 1); SLIST *temp_node = TT.slist_head; if(temp_node != NULL) { while(cpos < TT.nelem) { int start; if(!temp_node) break; start = temp_node->start_position; while(start < buffln) { //to avoid duplicate field printing. if(pfield[start]) { if(++start <= temp_node->end_position) continue; temp_node = temp_node->next; break; } else { //make sure this field won't print again. pfield[start] = (char) 0x23; //put some char at this position. xputc(buff[start]); } if(++start > temp_node->end_position) { temp_node = temp_node->next; break; } } cpos++; } xputc('\n'); } free(pfield); pfield = NULL; } return; } /* * used to verify "c", "b" and "f" options are present in command line. * As cut command can only except any of them at a time. */ static int get_user_options(void) { int i = 3, flag = 0; for(; i<6; i++) {//verify 3rd, 4th and 5th bit is set. int mask = 1; mask = mask << i; if(toys.optflags & mask) flag++; } return flag; } /* * free the slist. */ static void free_list(void) { SLIST *temp; while(TT.slist_head != NULL) { temp = TT.slist_head->next; free(TT.slist_head); TT.slist_head = temp; } return; }