cprover
Toggle main menu visibility
Loading...
Searching...
No Matches
mini_c_parser.cpp
Go to the documentation of this file.
1
/*******************************************************************\
2
3
Module: Mini C Parser
4
5
Author: Daniel Kroening, dkr@amazon.com
6
7
\*******************************************************************/
8
11
12
#include "
mini_c_parser.h
"
13
14
#include <
util/exception_utils.h
>
15
#include <
util/invariant.h
>
16
17
#include "
cscanner.h
"
18
19
class
mini_c_parsert
20
{
21
public
:
22
mini_c_parsert
()
23
{
24
}
25
26
c_translation_unitt
parse
(std::istream &);
27
28
protected
:
29
std::size_t
token_index
;
30
using
tokenst
= std::vector<ctokent>;
31
tokenst
tokens
;
32
33
bool
eof
()
const
34
{
35
return
is_eof
(
peek
());
36
}
37
38
c_declarationt
parse_declaration
();
39
tokenst
parse_pre_declarator
();
40
tokenst
parse_declarator
();
41
tokenst
parse_post_declarator
();
42
tokenst
parse_initializer
();
43
44
const
ctokent
&
peek
()
const
45
{
46
PRECONDITION
(
token_index
<
tokens
.size());
47
return
tokens
[
token_index
];
48
}
49
50
const
ctokent
&
peek
(std::size_t how_many)
const
51
{
52
PRECONDITION
(
token_index
+ how_many <
tokens
.size());
53
return
tokens
[
token_index
+ how_many];
54
}
55
56
const
ctokent
&
consume_token
()
57
{
58
PRECONDITION
(
token_index
<
tokens
.size());
59
PRECONDITION
(!
is_eof
(
tokens
[
token_index
]));
60
return
tokens
[
token_index
++];
61
}
62
63
static
bool
is_storage_class
(
const
ctokent
&token)
64
{
65
return
token ==
"auto"
|| token ==
"extern"
|| token ==
"static"
||
66
token ==
"register"
|| token ==
"_Thread_local"
;
67
}
68
69
static
bool
is_type_qualifier
(
const
ctokent
&token)
70
{
71
return
token ==
"const"
|| token ==
"volatile"
|| token ==
"restrict"
||
72
token ==
"_Atomic"
;
73
}
74
75
void
skip_ws
(
tokenst
&);
76
void
parse_brackets
(
char
open,
char
close,
tokenst
&dest);
77
};
78
79
std::ostream &
operator<<
(std::ostream &out,
const
c_declarationt
&declaration)
80
{
81
for
(
const
auto
&t : declaration.
pre_declarator
)
82
out << t.text;
83
84
for
(
const
auto
&t : declaration.
declarator
)
85
out << t.text;
86
87
for
(
const
auto
&t : declaration.
post_declarator
)
88
out << t.text;
89
90
for
(
const
auto
&t : declaration.
initializer
)
91
out << t.text;
92
93
return
out;
94
}
95
96
void
c_declarationt::print
(std::ostream &out)
const
97
{
98
if
(!
declarator
.empty())
99
{
100
out <<
"DECLARATOR: "
;
101
for
(
const
auto
&t :
declarator
)
102
out << t.text;
103
out <<
'\n'
;
104
}
105
}
106
107
bool
c_declarationt::is_function
()
const
108
{
109
return
!
post_declarator
.empty() &&
post_declarator
.front() ==
'('
;
110
}
111
112
bool
c_declarationt::has_body
()
const
113
{
114
return
!
initializer
.empty() &&
initializer
.front() ==
'{'
;
115
}
116
117
std::optional<ctokent>
c_declarationt::declared_identifier
()
const
118
{
119
for
(
auto
&t :
declarator
)
120
if
(
is_identifier
(t))
121
return
t;
122
return
{};
123
}
124
125
void
mini_c_parsert::skip_ws
(
tokenst
&dest)
126
{
127
if
(
eof
())
128
return
;
129
130
while
(
is_ws
(
peek
()) ||
is_comment
(
peek
()) ||
131
is_preprocessor_directive
(
peek
()))
132
{
133
dest.push_back(
consume_token
());
134
}
135
}
136
137
void
mini_c_parsert::parse_brackets
(
char
open,
char
close,
tokenst
&dest)
138
{
139
if
(
eof
() ||
peek
() != open)
140
return
;
141
142
std::size_t bracket_count = 0;
143
while
(
true
)
144
{
145
if
(
eof
())
146
throw
invalid_input_exceptiont
(
"expected "
+ std::string(1, close));
147
148
auto
&token =
consume_token
();
149
dest.push_back(token);
150
if
(token == open)
151
bracket_count++;
152
else
if
(token == close)
153
{
154
bracket_count--;
155
if
(bracket_count == 0)
156
break
;
// done
157
}
158
}
159
}
160
161
mini_c_parsert::tokenst
mini_c_parsert::parse_pre_declarator
()
162
{
163
// type qualifier
164
// storage class
165
// type
166
// '*'
167
tokenst
result;
168
169
while
(
true
)
170
{
171
skip_ws
(result);
172
173
if
(
eof
())
174
return
result;
175
176
auto
&token =
peek
();
177
178
if
(
179
is_type_qualifier
(token) ||
is_storage_class
(token) || token ==
'*'
||
180
token ==
"int"
|| token ==
"signed"
|| token.text ==
"unsigned"
||
181
token ==
"char"
|| token ==
"short"
|| token ==
"long"
||
182
token ==
"float"
|| token ==
"double"
|| token ==
"inline"
||
183
token ==
"typedef"
)
184
{
185
result.push_back(
consume_token
());
186
}
187
else
if
(token ==
"enum"
|| token ==
"struct"
|| token ==
"union"
)
188
{
189
result.push_back(
consume_token
());
190
191
skip_ws
(result);
192
193
// may be followed by a tag
194
if
(!
eof
() &&
is_identifier
(
peek
()))
195
result.push_back(
consume_token
());
196
197
skip_ws
(result);
198
199
// may be followed by a body {...}
200
parse_brackets
(
'{'
,
'}'
, result);
201
}
202
else
if
(token ==
"__attribute__"
)
203
{
204
result.push_back(
consume_token
());
205
skip_ws
(result);
206
// followed by (( ... ))
207
parse_brackets
(
'('
,
')'
, result);
208
}
209
else
if
(
is_identifier
(token))
210
{
211
// Might be typedef or the declarator.
212
// We look ahead for the next non-WS token to tell the difference.
213
std::size_t index = 1;
214
while
(
true
)
215
{
216
const
auto
&next_token =
peek
(index);
217
if
(
218
is_ws
(next_token) ||
is_preprocessor_directive
(next_token) ||
219
is_comment
(next_token))
220
index++;
221
else
222
break
;
223
}
224
225
auto
&next_token =
peek
(index);
226
if
(!
is_identifier
(next_token) && next_token !=
'*'
)
227
{
228
// 'token' is the declarator
229
return
result;
230
}
231
else
232
result.push_back(
consume_token
());
// it's a type
233
}
234
else
if
(token ==
';'
)
235
return
result;
236
else
if
(token ==
'('
)
// function type, part of declarator
237
return
result;
238
else
239
{
240
source_locationt
loc;
241
loc.
set_line
(token.line_number);
242
throw
invalid_source_file_exceptiont
(
243
"expected a declaration but got '"
+ token.text +
"'"
, loc);
244
}
245
}
246
}
247
248
mini_c_parsert::tokenst
mini_c_parsert::parse_declarator
()
249
{
250
// symbol
251
// ((...* symbol ...))
252
253
if
(
eof
())
254
return
{};
255
256
if
(
peek
() ==
';'
)
257
return
{};
258
259
if
(
peek
() ==
'('
)
260
{
261
tokenst
result;
262
parse_brackets
(
'('
,
')'
, result);
263
return
result;
264
}
265
else
if
(
is_identifier
(
peek
()))
266
{
267
return
{
consume_token
()};
268
}
269
else
270
{
271
source_locationt
loc;
272
loc.
set_line
(
peek
().line_number);
273
throw
invalid_source_file_exceptiont
(
"expected an identifier"
, loc);
274
}
275
}
276
277
mini_c_parsert::tokenst
mini_c_parsert::parse_post_declarator
()
278
{
279
// consume everything until we see one of the following:
280
// 1) ';' (end of declaration)
281
// 2) '{' (function body)
282
// 3) '=' (initializer)
283
284
tokenst
result;
285
std::size_t open_parentheses = 0;
286
287
while
(
true
)
288
{
289
if
(
eof
())
290
return
result;
291
292
if
(
peek
() ==
'('
)
293
{
294
++open_parentheses;
295
result.push_back(
consume_token
());
296
continue
;
297
}
298
else
if
(open_parentheses > 0)
299
{
300
if
(
peek
() ==
')'
)
301
--open_parentheses;
302
result.push_back(
consume_token
());
303
continue
;
304
}
305
306
if
(
peek
() ==
';'
||
peek
() ==
'{'
||
peek
() ==
'='
)
307
return
result;
308
309
result.push_back(
consume_token
());
310
}
311
}
312
313
mini_c_parsert::tokenst
mini_c_parsert::parse_initializer
()
314
{
315
if
(
eof
())
316
return
{};
317
else
if
(
peek
() ==
'='
)
318
{
319
tokenst
result;
320
while
(
true
)
321
{
322
if
(
eof
())
323
throw
invalid_input_exceptiont
(
"expected an initializer"
);
324
auto
&token =
consume_token
();
325
result.push_back(token);
326
if
(token ==
';'
)
327
return
result;
328
}
329
}
330
else
if
(
peek
() ==
';'
)
331
{
332
// done
333
return
{
consume_token
()};
334
}
335
else
if
(
peek
() ==
'{'
)
336
{
337
// function body
338
tokenst
result;
339
std::size_t bracket_count = 0;
340
while
(
true
)
341
{
342
if
(
eof
())
343
throw
invalid_input_exceptiont
(
"eof in function body"
);
344
auto
&token =
consume_token
();
345
result.push_back(token);
346
if
(token ==
'{'
)
347
bracket_count++;
348
else
if
(token ==
'}'
)
349
{
350
bracket_count--;
351
if
(bracket_count == 0)
352
return
result;
353
}
354
}
355
}
356
else
357
PRECONDITION
(
false
);
358
}
359
360
c_declarationt
mini_c_parsert::parse_declaration
()
361
{
362
c_declarationt
result;
363
364
result.
pre_declarator
=
parse_pre_declarator
();
365
result.
declarator
=
parse_declarator
();
366
result.
post_declarator
=
parse_post_declarator
();
367
result.
initializer
=
parse_initializer
();
368
369
return
result;
370
}
371
372
c_translation_unitt
mini_c_parsert::parse
(std::istream &in)
373
{
374
cscannert
cscanner(in);
375
cscanner.
return_WS_and_comments
=
true
;
376
tokens
= cscanner.
get_tokens
();
377
token_index
= 0;
378
379
if
(
tokens
.empty())
380
return
{};
381
382
DATA_INVARIANT
(
is_eof
(
tokens
.back()),
"token stream must end on eof"
);
383
384
c_translation_unitt
result;
385
386
while
(!
eof
())
387
result.push_back(
parse_declaration
());
388
389
return
result;
390
}
391
392
c_translation_unitt
parse_c
(std::istream &in)
393
{
394
return
mini_c_parsert
().
parse
(in);
395
}
cscannert
Definition
cscanner.h:21
cscannert::return_WS_and_comments
bool return_WS_and_comments
Definition
cscanner.h:31
cscannert::get_tokens
std::vector< ctokent > get_tokens()
Definition
cscanner.cpp:41
ctokent
Definition
ctoken.h:19
invalid_input_exceptiont
Thrown when user-provided input cannot be processed.
Definition
exception_utils.h:163
invalid_source_file_exceptiont
Thrown when we can't handle something in an input source file.
Definition
exception_utils.h:172
mini_c_parsert
Definition
mini_c_parser.cpp:20
mini_c_parsert::parse_declarator
tokenst parse_declarator()
Definition
mini_c_parser.cpp:248
mini_c_parsert::mini_c_parsert
mini_c_parsert()
Definition
mini_c_parser.cpp:22
mini_c_parsert::parse_brackets
void parse_brackets(char open, char close, tokenst &dest)
Definition
mini_c_parser.cpp:137
mini_c_parsert::peek
const ctokent & peek(std::size_t how_many) const
Definition
mini_c_parser.cpp:50
mini_c_parsert::parse
c_translation_unitt parse(std::istream &)
Definition
mini_c_parser.cpp:372
mini_c_parsert::skip_ws
void skip_ws(tokenst &)
Definition
mini_c_parser.cpp:125
mini_c_parsert::token_index
std::size_t token_index
Definition
mini_c_parser.cpp:29
mini_c_parsert::peek
const ctokent & peek() const
Definition
mini_c_parser.cpp:44
mini_c_parsert::is_storage_class
static bool is_storage_class(const ctokent &token)
Definition
mini_c_parser.cpp:63
mini_c_parsert::parse_post_declarator
tokenst parse_post_declarator()
Definition
mini_c_parser.cpp:277
mini_c_parsert::tokens
tokenst tokens
Definition
mini_c_parser.cpp:31
mini_c_parsert::parse_pre_declarator
tokenst parse_pre_declarator()
Definition
mini_c_parser.cpp:161
mini_c_parsert::tokenst
std::vector< ctokent > tokenst
Definition
mini_c_parser.cpp:30
mini_c_parsert::eof
bool eof() const
Definition
mini_c_parser.cpp:33
mini_c_parsert::parse_declaration
c_declarationt parse_declaration()
Definition
mini_c_parser.cpp:360
mini_c_parsert::parse_initializer
tokenst parse_initializer()
Definition
mini_c_parser.cpp:313
mini_c_parsert::consume_token
const ctokent & consume_token()
Definition
mini_c_parser.cpp:56
mini_c_parsert::is_type_qualifier
static bool is_type_qualifier(const ctokent &token)
Definition
mini_c_parser.cpp:69
source_locationt
Definition
source_location.h:20
source_locationt::set_line
void set_line(const irep_idt &line)
Definition
source_location.h:114
cscanner.h
cscanner
is_comment
static bool is_comment(const ctokent &t)
Definition
ctoken.h:93
is_preprocessor_directive
static bool is_preprocessor_directive(const ctokent &t)
Definition
ctoken.h:98
is_ws
static bool is_ws(const ctokent &t)
Definition
ctoken.h:83
is_eof
static bool is_eof(const ctokent &t)
Definition
ctoken.h:88
exception_utils.h
operator<<
std::ostream & operator<<(std::ostream &out, const c_declarationt &declaration)
Definition
mini_c_parser.cpp:79
parse_c
c_translation_unitt parse_c(std::istream &in)
Definition
mini_c_parser.cpp:392
mini_c_parser.h
Mini C Parser.
c_translation_unitt
std::vector< c_declarationt > c_translation_unitt
Definition
mini_c_parser.h:37
is_identifier
static bool is_identifier(int token)
Definition
parse.cpp:421
invariant.h
DATA_INVARIANT
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition
invariant.h:534
PRECONDITION
#define PRECONDITION(CONDITION)
Definition
invariant.h:463
c_declarationt
Definition
mini_c_parser.h:22
c_declarationt::has_body
bool has_body() const
Definition
mini_c_parser.cpp:112
c_declarationt::declared_identifier
std::optional< ctokent > declared_identifier() const
Definition
mini_c_parser.cpp:117
c_declarationt::is_function
bool is_function() const
Definition
mini_c_parser.cpp:107
c_declarationt::print
void print(std::ostream &) const
Definition
mini_c_parser.cpp:96
c_declarationt::post_declarator
tokenst post_declarator
Definition
mini_c_parser.h:28
c_declarationt::initializer
tokenst initializer
Definition
mini_c_parser.h:29
c_declarationt::declarator
tokenst declarator
Definition
mini_c_parser.h:27
c_declarationt::pre_declarator
tokenst pre_declarator
Definition
mini_c_parser.h:26
crangler
mini_c_parser.cpp
Generated by
1.17.0