-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharff_utils.c
137 lines (119 loc) · 2.92 KB
/
arff_utils.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#include <stdio.h>
#include <string.h>
#define MAXLINE 8192
void process_quoted_attribute(char *line);
void process_unquoted_attribute(char *line);
/****************************************
* http://weka.wikispaces.com/ARFF+%28book+version%29
*
*
*/
/* We use fscanf, from stdio.h, which is formatted data input.
* Page 162 of Advanced Programming in the UNIX Environment, 3rd.
* Any line that begins with % is a comment.
* The @relation, @attribute, and @data are case insensitive.
*
* ARFF Header Section * Example Attribute Line:
* @attribute NumberOfAdults {0,1,2,'3 or more'}
*
* ARFF Data Section
*
*/
int main()
{
char line[MAXLINE];
memset(line, 0, sizeof(line));
FILE *fp;
int count = 0;
if ((fp = fopen("/home/krw/data/training_subsetD.arff", "r")) == NULL)
{
printf("error opening file!\n");
return 1;
}
else
{
/* While there are lines in the file. */
while (fgets(line, MAXLINE, fp) != NULL)
{
/* If line is an attribute. */
if ( strncmp("@attribute", line, 10) == 0 )
{
/* Process quoted attribute. */
if ( line[11] == '\'' )
{
printf("QUOTED!\n");
process_quoted_attribute(line);
}
/* Process unquoted attribute. */
else
{
printf("UNQUOTED!\n");
process_unquoted_attribute(line);
}
}
/* Else if has more than 20 chars, then data line. */
else if ( strlen(line) > 20 )
{
count++;
}
}
printf("data lines: %d\n", count);
/*
while (( c = getc(fp)) != EOF && i < MAXLINE)
{
line[i] = c;
i++;
if ( c == '\n' )
{
if ( strncmp(attr_str, line, 10) == 0 )
{
printf("\n\nFOUND ATTRIBUTE!\n");
printf("%s", line);
}
i = 0;
memset(line, 0, sizeof(line));
}
}
*/
fclose(fp);
}
return 0;
}
void process_unquoted_attribute(char *line)
{
char *str1;
char *tok;
str1 = strstr(line, "{");
char *tokens[100] = {'\0'};
int counter = 0;
printf("LINE: %s\n\n", line);
printf("STR1: %s\n\n", str1);
tok = strtok(str1, ",");
tok += 1; /* move 1 ahead to move passed the { char */
tokens[counter++] = tok;
while ( (tok = strtok(NULL, ",")) )
{
char *tmp;
tmp = tok;
/* walk through to find last } and change to 0. */
while ( *tmp != '\n')
tmp += 1;
tmp = tmp - 1;
*tmp = 0;
tokens[counter++] = tok;
}
printf("\n\nhere they are\n\n");
int i = 0;
for ( i = 0; i < 10; i++ )
{
if ( tokens[i] != '\0' )
printf("tokens[%d] = %s\n", i, tokens[i]);
else
break;
}
printf("\n\n\n");
}
void process_quoted_attribute(char *line)
{
printf("%s", line);
}