01-13-2015 02:52 AM
Hi,
I am glad to see that you have resolved all your issues.
Concerning column number limitation, may be you can do the test in the while condition :
while(*carattere && !error && (stringa_in_corso<numero_stringhe)) ==> it will stop the parsing as soon as the column number is reached
Regards,
Stef
01-13-2015 02:57 AM
i've done it this way because i had already tried it with the old version and there was this problem:
if i have a special column with CR LF inside and i don't parse it because i've reached the maximum readable column when i will parse the next line i will get an error because i'm reading a "inquote" value
01-15-2015 02:28 AM
i've done some correction to handle correctly the inquote column with CR LF inside it. this seem to be the final reader version.
commented there is the old version.
i have done also a column counter, maybe it's useful to allocate the destination string
int leggi_riga_csv_v2(char **lines, int riga_partenza, char *stringa_destinazione[], int numero_stringhe, int formato) { char delimitatore[2] = {',',';'}; int stringa_in_corso = 0; int index_stringa_in_corso = 0; int inquote = 0; int i = 0; int error = 0; char *carattere = NULL; for(i = 0; i < numero_stringhe; i++){ stringa_destinazione[i][0]=0; } //Point to begining of current line carattere = lines[riga_partenza]; index_stringa_in_corso = 0; //while(*carattere && !error) while(!error) { switch(*carattere){ case '\"': if(index_stringa_in_corso == 0){ // se come primo carattere ho una " allora e' una stringa speciale inquote = 1; //carattere++; //skip quote carattere++; //get next character } else{ if(inquote){ //Check for double quote carattere++; if(*carattere == '\"'){ if(stringa_in_corso < numero_stringhe){ stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = *carattere; //concatenate quote } carattere++; //skip quote } else if((*carattere == 0) || (*carattere == '\r') || (*carattere == '\n')){ //end of quoted string if(stringa_in_corso < numero_stringhe){ stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string } error = 1; //reached end of line } else if(*carattere == delimitatore[formato]){ if(stringa_in_corso < numero_stringhe){ stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string } //parse next string stringa_in_corso++; index_stringa_in_corso=0; carattere++; //get next character } else{ error = 1; //Quote string not followed by delimiter or end of string ! } /* else if((*carattere != 0) && (*carattere != delimitatore[formato])){ error = 1; //Quote string not followed by delimiter or end of string ! } else{ //end of quoted string if(stringa_in_corso < numero_stringhe){ stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string } //parse next string stringa_in_corso++; index_stringa_in_corso=0; } */ } else error = 1; //Quote inside unquoted string ! } break; case ',': if(formato == 1){ if(stringa_in_corso < numero_stringhe){ stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = '.'; //replace , by . } } else { if(stringa_in_corso < numero_stringhe){ stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string } //parse next string stringa_in_corso++; index_stringa_in_corso=0; } carattere++; break; case ';': if(formato == 0){ if(stringa_in_corso < numero_stringhe){ stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = ';'; } } else { if(stringa_in_corso < numero_stringhe){ stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string } //parse next string stringa_in_corso++; index_stringa_in_corso=0; } carattere++; break; case '\0': // Terminatore case '\r': // CR = 0x0D = 13 case '\n': // LF = 0x0A = 10 if(inquote){ //cariage return or line feed inside quote //insert line break and parse next line if(stringa_in_corso < numero_stringhe){ stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = '\r'; stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = '\n'; } carattere = lines[++riga_partenza]; } else{ //end of line if(stringa_in_corso < numero_stringhe){ stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string } error = 1; //reached end of line } break; default: //copy others characters in destination string if(stringa_in_corso < numero_stringhe){ stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = *carattere; } carattere++; break; } } return riga_partenza; }
int contacolonne_csv(char **lines, int formato) { char delimitatore[2] = {',',';'}; int numero_colonne = {0}; int index_stringa_in_corso = {0}; int riga_partenza = {0}; int inquote = 0; int error = 0; char *carattere = NULL; //Point to begining of current line carattere = lines[riga_partenza]; while(!error) { switch(*carattere){ case '\"': if(index_stringa_in_corso == 0){ // se come primo carattere ho una " allora e' una stringa speciale inquote = 1; carattere++; //get next character } else{ if(inquote){ //get next character carattere++; if(*carattere == '\"'){ //Check for double quote index_stringa_in_corso++; carattere++; //get next character } else if(*carattere == 0){ //end of quoted string numero_colonne++; error = 1; //reached end of line } else if(*carattere == delimitatore[formato]){ //end of quoted string //parse next string carattere++; //get next character numero_colonne++; index_stringa_in_corso=0; } else{ error = 1; //Quote string not followed by delimiter or end of string ! } } else error = 1; //Quote inside unquoted string ! } break; case ',': if(formato == 1){ // non fare niente index_stringa_in_corso++; } else { //end of string //parse next string numero_colonne++; index_stringa_in_corso=0; } carattere++; break; case ';': if(formato == 0){ // non fare niente index_stringa_in_corso++; } else { //end of string //parse next string numero_colonne++; index_stringa_in_corso=0; } carattere++; break; case '\0': // Terminatore case '\r': // CR = 0x0D = 13 case '\n': // LF = 0x0A = 10 if(inquote){ //cariage return or line feed inside quote // non fare niente index_stringa_in_corso++; index_stringa_in_corso++; carattere = lines[++riga_partenza]; } else{ //parse next string numero_colonne++; error = 1; //reached end of line } break; default: // non fare niente index_stringa_in_corso++; carattere++; break; } } return numero_colonne; }
01-15-2015 03:19 AM
Hi holly7787,
The best could be to allocate destination strings when parsing a line.
Each time you encounter a new separator you allocate a new string ... then your function can return allocated strings and the number of columns found.
Next part of your code will process strings and free allocated strings.
This will avoid to walk through the whole twice (to count columns and then parse lines).
Last optimization, line splitting is also not required you can just put the content of all your file in a buffer and pass the buffer pointer and the last character offset to your function.
Something like this :
int leggi_riga_csv_v3(char *buffer, int *offset ...
*carattere will become buffer[*offset]
carattere++ will become offset++
Best regards,
Stef