Reputation: 3618
I need to parse an ics file in C and will be doing it line by line. Each line can vary a lot by their format but generally holds a standard.
Here are some rules I have noticed:
Here is an example ics component that would need to be parsed out:
UID:[email protected]
DTSTAMP:19970714T170000Z
ORGANIZER;CN=John Doe:MAILTO:[email protected]
CATEGORIES:Project Report, XYZ, Weekly Meeting
DTSTART:19970714T170000Z
DTEND:19970715T035959Z
SUMMARY:Bastille Day Party
You'll notice in things like the MAILTO
there is a following :
. Only the first colon would be parsed, and the rest after that colon is the property value.
Using something like strtok() seems to basic to be adequate for this problem.
Should something like regular expression be used to solve this problem? Looking into it, I see an example of a regex solution being done in C# on this stackoverflow answer.
Upvotes: 1
Views: 1353
Reputation: 586
// disclaimer : no support
// code provided as a example of minimal things one can do.
#include <malloc.h>
#include <stddef.h>
#include <string.h>
#include <stdio.h>
struct value {
struct value *next;
char *val;
};
struct property {
struct property *next;
char *prop;
};
struct parameter {
struct property *props;
struct value *vals;
struct parameter *next;
char *name;
};
enum PARSE_STATE { PARAMETER, PROPERTY, VALUE };
//format for lines is...
// PARAMETER[;PARAM_PROPERTY..]:VALUE[,VALUE2..]\n
struct parameter *parse( char *input )
{
size_t start, end;
char *buf;
enum PARSE_STATE state;
struct parameter *root = NULL;
struct parameter *new_parameter;
struct property *new_property;
struct value *new_value;
char in_quote = 0;
start = end = 0;
state = PARAMETER;
while( input[end] )
{
switch( state )
{
case PARAMETER :
if( input[end] == ';' || input[end] == ':' ) {
new_parameter = malloc( sizeof( struct parameter ) );
new_parameter->next = root;
new_parameter->name = malloc( end - start + 1 );
strncpy( new_parameter->name, input + start, end - start );
new_parameter->name[end-start] = 0;
new_parameter->props = new_parameter->vals = NULL;
root = new_parameter;
start = end + 1;
if( input[end] == ';' )
state = PROPERTY;
else
state = VALUE;
}
break;
case PROPERTY :
if( input[end] == '"' ) {
if( !in_quote )
in_quote = input[start];
else if( input[start] == in_quote )
in_quote = 0;
break;
}
if( in_quote ) break;
if( input[end] == ';' || input[end] == ':' ) {
new_property = malloc( sizeof( struct property ) );
new_property->prop = malloc( end - start + 1 );
strncpy( new_property->prop, input + start, end - start );
new_property->prop[end-start] = 0;
new_property->next = root->props;
root->props = new_property;
if( input[end] == ':' )
state = VALUE;
start = end + 1;
break;
}
break;
case VALUE :
if( input[end] == '\n' || input[end] == ',' ) {
new_value = malloc( sizeof( struct value ) );
new_value->val = malloc( end - start + 1 );
strncpy( new_value->val, input + start, end - start );
new_value->val[end-start] = 0;
new_value->next = root->vals;
root->vals = new_value;
if( input[end] == '\n' )
state = PARAMETER;
start = end + 1;
}
break;
}
end++;
}
if( end != start )
fprintf( stderr, "missing newline at end of input\n" );
return root;
}
void DumpResult( struct parameter *root )
{
struct property *prop;
struct value *val;
for( ; root; root = root->next ) {
printf( "%s ", root->name );
for( prop = root->props; prop; prop = prop->next )
printf( "; %s ", prop->prop );
for( val = root->vals; val; val = val->next ) {
if( val == root->vals )
printf( " : %s ", val->val );
else
printf( ", %s ", val->val );
}
printf( "\n" );
}
}
And... using the above code. The values do all get reversed....
void main( void )
{
char *string = "UID:[email protected]\n"
"DTSTAMP:19970714T170000Z\n"
"ORGANIZER;CN=John Doe;SENT-BY=\"mailto:[email protected]\":mailto:[email protected]\n"
"CATEGORIES:Project Report, XYZ, Weekly Meeting\n"
"DTSTART:19970714T170000Z\n"
"DTEND:19970715T035959Z\n"
"SUMMARY:Bastille Day Party\n";
struct parameter *thing = parse( string );
DumpResult( thing );
}
Upvotes: 0
Reputation: 53006
You can do it with this
#include <stdlib.h>
#include <string.h>
int
main(void)
{
FILE *ics;
char line[100];
ics = fopen("example.ics", "r");
if (ics == NULL)
return -1;
while (fgets(line, sizeof(line), ics) != NULL)
{
char *separator;
char *key;
char *tail;
char *value;
if ((tail = strchr(line, '\n')) != NULL)
*tail = '\0'; // Remove the trailing '\n'
separator = strpbrk(line, ":;");
if (separator == NULL)
continue;
*separator = '\0';
key = line; // Maybe you want to strip surrounding white spaces
value = separator + 1; // Maybe you want to strip surrounding white spaces
fprintf(stdout, "%s --> %s\n", key, value);
}
fclose(ics);
return 0;
}
Using a regular expression for this is like killing a fly with a bazooka.
Upvotes: 2