forked from cme3202/lexical_analyzer_cengpp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pl.c
210 lines (187 loc) · 6.34 KB
/
pl.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define MAXCHAR 1000 //maxchar on a line
int parse_line(char *str, int line_count); // read line & remove case sensivity & find comments & detect structure
int find_identifiers(char *str, int line_count); // find identifiers & make controls & create token
int find_constants(char *str, int line_count); // find integer and string constants & make controls & create token
int find_operators(char *str, int line_count); // find operators & make controls & create token
int find_brackets(char *str, int line_count); // find brackets & make controls & create token
int find_keywords(char *str, int line_count); // find keywords & make controls & create token
int write_token_on_lex(char *str, int line_count); // write a token on lex file created
int show_error(char *type, int line_count); // show error with type and line count
char *strremove(char *str, const char *sub);
char *identifiers[1000];
int identifier_count = 0;
//////////////////////daha düzenlenecek bu kod.
int main()
{
FILE *fp;
char str[MAXCHAR];
char *filename = "code.ceng";
int line_count = 1;
fp = fopen(filename, "r"); // start to read file
if (fp == NULL)
{
printf("Could not open file %s", filename);
return 1;
}
while (fgets(str, MAXCHAR, fp) != NULL)
{ // read file line by line
parse_line(str, line_count);
find_identifiers(str, line_count);
line_count++;
}
printf("\n");
fclose(fp);
return 0;
}
int parse_line(char *str, int line_count)
{
/*
End of line: ;
Example Token: EndOfLine
Anything between (* and *) is a comment.
If a comment cannot terminate before the file end, there should be a lexical error issued.
Comments are just like blank space and they provide no tokens.
*/
//printf("Line %d: %s", line_count, str);
// remove case sensitivity
// escape whitespaces more than one
// switch case for splitting
//printf("deneme\n");
return 0;
}
int find_identifiers(char *str, int line_count)
{
/*
maximum identifier size is 20 characters. If you use an identifier larger than that, the lexical analyzer issues an error message.
ceng++ language is not case sensitive
identifiers start with an alphabetic character (a letter) and are composed of one or more letters,digits or_ (underscore)
Example Token: Identifier(my_var_1)
*/
char *identifier;
char *righthandside;
const char *operators[7] = {"+", "-", "*", "/", "++", "--", ":="};
const char *keywords[20] = {"break", "case", "char", "const", "continue", "do", "else", "enum", "float", "for", "goto", "if", "int", "long", "record", "return", "static", "while"};
char *left;
char *right;
int endofline = 0;
str = strremove(str, "(*");
str = strremove(str, "*)");
for (int i = 0; i < 18; i++)
{
if (strstr(str, keywords[i]) != NULL)
{
printf("Keyword(%s) \n", keywords[i]);
str = strremove(str, keywords[i]);
}
}
find_brackets(str,line_count);
if (strstr(str, ";") != NULL)
endofline = 1;
if (strstr(str, ":=") != NULL)
{
identifier = strtok(str, ":=");
while (righthandside != NULL)
{
righthandside = strtok(NULL, " :=");
}
if (strlen(identifier) > 20)
{
printf("Idetifier size is exceeded. Maximum identifier size must be 20. \n");
exit(0);
}
else
{
if (isalpha(identifier[0]))
{
printf("Identifier(%s) \n", identifier);
printf("Operator(:=)\n");
}
else
{
printf("Idetifiers must begin with an alphabet. \n");
exit(0);
}
}
}
for (int i = 0; i < 7; i++)
{
if (strstr(str, operators[i]) != NULL)
{
printf("Operator(%s) \n", operators[i]);
}
}
if (endofline == 1)
{
printf("EndOfLine\n");
endofline = 0;
}
return 0;
}
int find_constants(char *str, int line_count)
{
/*
max integer size is 10 digits
negative values are not supported
Example Token: IntConst(352)
string constants of Ceng++ are delimited by double quotes (ASCII code 34) as in “this is a string”
string constants have unlimited size
string constants cannot contain the double quote character. when you reach one, the string terminates.
if a string constant cannot terminate before the file end, there should be a lexical error issued.
*/
}
int find_operators(char *str, int line_count)
{
/* valid operators of the language are +,-,*,/,++,--,:=
Example Token: Operator(++)
*/
}
int find_brackets(char *str, int line_count)
{
/*
LeftPar: ( RightPar: )
LeftSquareBracket: [ RightSquareBracket: ]
LeftCurlyBracket: { RightCurlyBracket: }
Example Token: LeftCurlyBracket
*/
if(strstr(str,"(") != NULL)
printf("LeftPar\n");
if(strstr(str,")") != NULL)
printf("RightPar\n");
if(strstr(str,"[") != NULL)
printf("LeftSquareBracket\n");
if(strstr(str,"]") != NULL)
printf("RightSquareBracket\n");
if(strstr(str,"{") != NULL)
printf("LeftCurlyBracket\n");
if(strstr(str,"}") != NULL)
printf("RightCurlyBracket\n");
}
int find_keywords(char *str, int line_count)
{
/*
Keywords are:
break,case,char,const,continue,do,else,enum,float,for,goto,if,int,long,record,return,static,while
Ceng++ language is not case sensitive and all the keywords are standardized as lower case.
You can write the same word as “while” OR “While” OR “WHILE” and they all generate the
Example Token: Keyword(while)
*/
}
int write_token_on_lex(char *str, int line_count) {}
int show_error(char *type, int line_count) {}
char *strremove(char *str, const char *sub)
{
size_t len = strlen(sub);
if (len > 0)
{
char *p = str;
while ((p = strstr(p, sub)) != NULL)
{
memmove(p, p + len, strlen(p + len) + 1);
}
}
return str;
}