cprover
Toggle main menu visibility
Loading...
Searching...
No Matches
format_strings.cpp
Go to the documentation of this file.
1
/*******************************************************************\
2
3
Module: Format String Parser
4
5
Author: CM Wintersteiger
6
7
\*******************************************************************/
8
11
12
#include "
format_strings.h
"
13
14
#include <
util/c_types.h
>
15
#include <
util/exception_utils.h
>
16
#include <
util/invariant.h
>
17
#include <
util/std_expr.h
>
18
19
#include <cctype>
20
21
void
parse_flags
(std::string::const_iterator &it,
format_tokent
&curtok)
22
{
23
while
(*it ==
'#'
|| *it ==
'0'
|| *it ==
'-'
|| *it ==
' '
|| *it ==
'+'
)
24
{
25
switch
(*it)
26
{
27
case
'#'
:
28
curtok.
flags
.push_back(
format_tokent::flag_typet::ALTERNATE
);
29
break
;
30
case
'0'
:
31
curtok.
flags
.push_back(
format_tokent::flag_typet::ZERO_PAD
);
32
break
;
33
case
'-'
:
34
curtok.
flags
.push_back(
format_tokent::flag_typet::LEFT_ADJUST
);
35
break
;
36
case
' '
:
37
curtok.
flags
.push_back(
format_tokent::flag_typet::SIGNED_SPACE
);
38
break
;
39
case
'+'
:
40
curtok.
flags
.push_back(
format_tokent::flag_typet::SIGN
);
41
break
;
42
default
:
43
throw
unsupported_operation_exceptiont
(
44
std::string(
"unsupported format specifier flag: '"
) + *it +
"'"
);
45
}
46
it++;
47
}
48
}
49
50
void
parse_field_width
(std::string::const_iterator &it,
format_tokent
&curtok)
51
{
52
if
(*it ==
'*'
)
53
{
54
curtok.
flags
.push_back(
format_tokent::flag_typet::ASTERISK
);
55
it++;
56
}
57
58
std::string tmp;
59
for
(; isdigit(*it); it++)
60
tmp += *it;
61
curtok.
field_width
=
string2integer
(tmp);
62
}
63
64
void
parse_precision
(std::string::const_iterator &it,
format_tokent
&curtok)
65
{
66
if
(*it ==
'.'
)
67
{
68
it++;
69
70
if
(*it ==
'*'
)
71
{
72
curtok.
flags
.push_back(
format_tokent::flag_typet::ASTERISK
);
73
it++;
74
}
75
else
76
{
77
std::string tmp;
78
for
(; isdigit(*it); it++)
79
tmp += *it;
80
curtok.
precision
=
string2integer
(tmp);
81
}
82
}
83
}
84
85
void
parse_length_modifier
(
86
std::string::const_iterator &it,
87
format_tokent
&curtok)
88
{
89
if
(*it ==
'h'
)
90
{
91
it++;
92
if
(*it ==
'h'
)
93
it++;
94
curtok.
length_modifier
=
format_tokent::length_modifierst::LEN_h
;
95
}
96
else
if
(*it ==
'l'
)
97
{
98
it++;
99
if
(*it ==
'l'
)
100
it++;
101
curtok.
length_modifier
=
format_tokent::length_modifierst::LEN_l
;
102
}
103
else
if
(*it ==
'L'
)
104
{
105
it++;
106
curtok.
length_modifier
=
format_tokent::length_modifierst::LEN_L
;
107
}
108
else
if
(*it ==
'j'
)
109
{
110
it++;
111
curtok.
length_modifier
=
format_tokent::length_modifierst::LEN_j
;
112
}
113
else
if
(*it ==
't'
)
114
{
115
it++;
116
curtok.
length_modifier
=
format_tokent::length_modifierst::LEN_L
;
117
}
118
}
119
120
void
parse_conversion_specifier
(
121
const
std::string &arg_string,
122
std::string::const_iterator &it,
123
format_tokent
&curtok)
124
{
125
switch
(*it)
126
{
127
case
'd'
:
128
case
'i'
:
129
curtok.
type
=
format_tokent::token_typet::INT
;
130
curtok.
representation
=
format_tokent::representationt::SIGNED_DEC
;
131
break
;
132
case
'o'
:
133
curtok.
type
=
format_tokent::token_typet::INT
;
134
curtok.
representation
=
format_tokent::representationt::UNSIGNED_OCT
;
135
break
;
136
case
'u'
:
137
curtok.
type
=
format_tokent::token_typet::INT
;
138
curtok.
representation
=
format_tokent::representationt::UNSIGNED_DEC
;
139
break
;
140
case
'x'
:
141
case
'X'
:
142
curtok.
type
=
format_tokent::token_typet::INT
;
143
curtok.
representation
=
format_tokent::representationt::UNSIGNED_HEX
;
144
break
;
145
case
'e'
:
146
case
'E'
:
147
curtok.
type
=
format_tokent::token_typet::FLOAT
;
148
break
;
149
case
'f'
:
150
case
'F'
:
151
curtok.
type
=
format_tokent::token_typet::FLOAT
;
152
break
;
153
case
'g'
:
154
case
'G'
:
155
curtok.
type
=
format_tokent::token_typet::FLOAT
;
156
break
;
157
case
'a'
:
158
case
'A'
:
159
curtok.
type
=
format_tokent::token_typet::FLOAT
;
160
break
;
161
case
'c'
:
162
curtok.
type
=
format_tokent::token_typet::CHAR
;
163
break
;
164
case
's'
:
165
curtok.
type
=
format_tokent::token_typet::STRING
;
166
break
;
167
case
'p'
:
168
curtok.
type
=
format_tokent::token_typet::POINTER
;
169
break
;
170
case
'%'
:
171
curtok.
type
=
format_tokent::token_typet::TEXT
;
172
curtok.
value
=
"%"
;
173
break
;
174
case
'['
:
// pattern matching in, e.g., fscanf.
175
{
176
std::string tmp;
177
it++;
178
if
(*it ==
'^'
)
// if it's there, it must be first
179
{
180
tmp +=
'^'
;
181
it++;
182
if
(*it ==
']'
)
// if it's there, it must be here
183
{
184
tmp +=
']'
;
185
it++;
186
}
187
}
188
189
for
(; it != arg_string.end() && *it !=
']'
; it++)
190
tmp += *it;
191
192
break
;
193
}
194
195
default
:
196
throw
unsupported_operation_exceptiont
(
197
std::string(
"unsupported format conversion specifier: '"
) + *it +
"'"
);
198
}
199
it++;
200
}
201
202
format_token_listt
parse_format_string
(
const
std::string &arg_string)
203
{
204
format_token_listt
token_list;
205
206
std::string::const_iterator it = arg_string.begin();
207
208
while
(it != arg_string.end())
209
{
210
if
(*it ==
'%'
)
211
{
212
token_list.push_back(
format_tokent
());
213
format_tokent
&curtok = token_list.back();
214
it++;
215
216
parse_flags
(it, curtok);
217
parse_field_width
(it, curtok);
218
parse_precision
(it, curtok);
219
parse_length_modifier
(it, curtok);
220
parse_conversion_specifier
(arg_string, it, curtok);
221
}
222
else
223
{
224
if
(
225
token_list.empty() ||
226
token_list.back().type !=
format_tokent::token_typet::TEXT
)
227
token_list.push_back(
format_tokent
(
format_tokent::token_typet::TEXT
));
228
229
std::string tmp;
230
for
(; it != arg_string.end() && *it !=
'%'
; it++)
231
tmp += *it;
232
233
INVARIANT
(
234
!token_list.empty() &&
235
token_list.back().type ==
format_tokent::token_typet::TEXT
,
236
"must already have a TEXT token at the back of the token list"
);
237
238
token_list.back().value = tmp;
239
}
240
}
241
242
return
token_list;
243
}
244
245
std::optional<typet>
get_type
(
const
format_tokent
&token)
246
{
247
switch
(token.
type
)
248
{
249
case
format_tokent::token_typet::INT
:
250
switch
(token.
length_modifier
)
251
{
252
case
format_tokent::length_modifierst::LEN_h
:
253
if
(token.
representation
==
format_tokent::representationt::SIGNED_DEC
)
254
return
signed_char_type
();
255
else
256
return
unsigned_char_type
();
257
258
case
format_tokent::length_modifierst::LEN_hh
:
259
if
(token.
representation
==
format_tokent::representationt::SIGNED_DEC
)
260
return
signed_short_int_type
();
261
else
262
return
unsigned_short_int_type
();
263
264
case
format_tokent::length_modifierst::LEN_l
:
265
if
(token.
representation
==
format_tokent::representationt::SIGNED_DEC
)
266
return
signed_long_int_type
();
267
else
268
return
unsigned_long_int_type
();
269
270
case
format_tokent::length_modifierst::LEN_ll
:
271
if
(token.
representation
==
format_tokent::representationt::SIGNED_DEC
)
272
return
signed_long_long_int_type
();
273
else
274
return
unsigned_long_long_int_type
();
275
276
case
format_tokent::length_modifierst::LEN_t
:
277
case
format_tokent::length_modifierst::LEN_j
:
278
case
format_tokent::length_modifierst::LEN_L
:
279
case
format_tokent::length_modifierst::LEN_undef
:
280
if
(token.
representation
==
format_tokent::representationt::SIGNED_DEC
)
281
return
signed_int_type
();
282
else
283
return
unsigned_int_type
();
284
}
285
286
case
format_tokent::token_typet::FLOAT
:
287
switch
(token.
length_modifier
)
288
{
289
case
format_tokent::length_modifierst::LEN_l
:
290
return
double_type
();
291
case
format_tokent::length_modifierst::LEN_L
:
292
return
long_double_type
();
293
case
format_tokent::length_modifierst::LEN_h
:
294
case
format_tokent::length_modifierst::LEN_hh
:
295
case
format_tokent::length_modifierst::LEN_j
:
296
case
format_tokent::length_modifierst::LEN_ll
:
297
case
format_tokent::length_modifierst::LEN_t
:
298
case
format_tokent::length_modifierst::LEN_undef
:
299
return
float_type
();
300
}
301
302
case
format_tokent::token_typet::CHAR
:
303
switch
(token.
length_modifier
)
304
{
305
case
format_tokent::length_modifierst::LEN_l
:
306
return
wchar_t_type
();
307
case
format_tokent::length_modifierst::LEN_h
:
308
case
format_tokent::length_modifierst::LEN_hh
:
309
case
format_tokent::length_modifierst::LEN_j
:
310
case
format_tokent::length_modifierst::LEN_L
:
311
case
format_tokent::length_modifierst::LEN_ll
:
312
case
format_tokent::length_modifierst::LEN_t
:
313
case
format_tokent::length_modifierst::LEN_undef
:
314
return
char_type
();
315
}
316
317
case
format_tokent::token_typet::POINTER
:
318
return
pointer_type
(
void_type
());
319
320
case
format_tokent::token_typet::STRING
:
321
switch
(token.
length_modifier
)
322
{
323
case
format_tokent::length_modifierst::LEN_l
:
324
return
array_typet
(
wchar_t_type
(),
nil_exprt
());
325
case
format_tokent::length_modifierst::LEN_h
:
326
case
format_tokent::length_modifierst::LEN_hh
:
327
case
format_tokent::length_modifierst::LEN_j
:
328
case
format_tokent::length_modifierst::LEN_L
:
329
case
format_tokent::length_modifierst::LEN_ll
:
330
case
format_tokent::length_modifierst::LEN_t
:
331
case
format_tokent::length_modifierst::LEN_undef
:
332
return
array_typet
(
char_type
(),
nil_exprt
());
333
}
334
335
case
format_tokent::token_typet::TEXT
:
336
case
format_tokent::token_typet::UNKNOWN
:
337
return
{};
338
}
339
340
UNREACHABLE
;
341
}
float_type
floatbv_typet float_type()
Definition
c_types.cpp:177
signed_long_int_type
signedbv_typet signed_long_int_type()
Definition
c_types.cpp:72
signed_char_type
signedbv_typet signed_char_type()
Definition
c_types.cpp:134
unsigned_int_type
unsignedbv_typet unsigned_int_type()
Definition
c_types.cpp:36
unsigned_long_long_int_type
unsignedbv_typet unsigned_long_long_int_type()
Definition
c_types.cpp:93
unsigned_long_int_type
unsignedbv_typet unsigned_long_int_type()
Definition
c_types.cpp:86
void_type
empty_typet void_type()
Definition
c_types.cpp:245
signed_int_type
signedbv_typet signed_int_type()
Definition
c_types.cpp:22
pointer_type
pointer_typet pointer_type(const typet &subtype)
Definition
c_types.cpp:235
unsigned_char_type
unsignedbv_typet unsigned_char_type()
Definition
c_types.cpp:127
char_type
bitvector_typet char_type()
Definition
c_types.cpp:106
signed_long_long_int_type
signedbv_typet signed_long_long_int_type()
Definition
c_types.cpp:79
wchar_t_type
bitvector_typet wchar_t_type()
Definition
c_types.cpp:141
long_double_type
floatbv_typet long_double_type()
Definition
c_types.cpp:193
double_type
floatbv_typet double_type()
Definition
c_types.cpp:185
signed_short_int_type
signedbv_typet signed_short_int_type()
Definition
c_types.cpp:29
unsigned_short_int_type
unsignedbv_typet unsigned_short_int_type()
Definition
c_types.cpp:43
c_types.h
array_typet
Arrays with given size.
Definition
std_types.h:807
format_tokent
Definition
format_strings.h:25
format_tokent::precision
mp_integer precision
Definition
format_strings.h:85
format_tokent::type
token_typet type
Definition
format_strings.h:82
format_tokent::field_width
mp_integer field_width
Definition
format_strings.h:84
format_tokent::representation
representationt representation
Definition
format_strings.h:87
format_tokent::flag_typet::ALTERNATE
@ ALTERNATE
Definition
format_strings.h:40
format_tokent::flag_typet::SIGN
@ SIGN
Definition
format_strings.h:44
format_tokent::flag_typet::ASTERISK
@ ASTERISK
Definition
format_strings.h:45
format_tokent::flag_typet::ZERO_PAD
@ ZERO_PAD
Definition
format_strings.h:41
format_tokent::flag_typet::LEFT_ADJUST
@ LEFT_ADJUST
Definition
format_strings.h:42
format_tokent::flag_typet::SIGNED_SPACE
@ SIGNED_SPACE
Definition
format_strings.h:43
format_tokent::token_typet::CHAR
@ CHAR
Definition
format_strings.h:33
format_tokent::token_typet::INT
@ INT
Definition
format_strings.h:31
format_tokent::token_typet::TEXT
@ TEXT
Definition
format_strings.h:30
format_tokent::token_typet::STRING
@ STRING
Definition
format_strings.h:34
format_tokent::token_typet::UNKNOWN
@ UNKNOWN
Definition
format_strings.h:29
format_tokent::token_typet::POINTER
@ POINTER
Definition
format_strings.h:35
format_tokent::token_typet::FLOAT
@ FLOAT
Definition
format_strings.h:32
format_tokent::length_modifier
length_modifierst length_modifier
Definition
format_strings.h:86
format_tokent::length_modifierst::LEN_ll
@ LEN_ll
Definition
format_strings.h:54
format_tokent::length_modifierst::LEN_l
@ LEN_l
Definition
format_strings.h:53
format_tokent::length_modifierst::LEN_h
@ LEN_h
Definition
format_strings.h:51
format_tokent::length_modifierst::LEN_t
@ LEN_t
Definition
format_strings.h:57
format_tokent::length_modifierst::LEN_undef
@ LEN_undef
Definition
format_strings.h:50
format_tokent::length_modifierst::LEN_hh
@ LEN_hh
Definition
format_strings.h:52
format_tokent::length_modifierst::LEN_j
@ LEN_j
Definition
format_strings.h:56
format_tokent::length_modifierst::LEN_L
@ LEN_L
Definition
format_strings.h:55
format_tokent::flags
std::list< flag_typet > flags
Definition
format_strings.h:83
format_tokent::representationt::UNSIGNED_DEC
@ UNSIGNED_DEC
Definition
format_strings.h:64
format_tokent::representationt::UNSIGNED_HEX
@ UNSIGNED_HEX
Definition
format_strings.h:66
format_tokent::representationt::UNSIGNED_OCT
@ UNSIGNED_OCT
Definition
format_strings.h:65
format_tokent::representationt::SIGNED_DEC
@ SIGNED_DEC
Definition
format_strings.h:63
format_tokent::value
irep_idt value
Definition
format_strings.h:88
nil_exprt
The NIL expression.
Definition
std_expr.h:3134
unsupported_operation_exceptiont
Thrown when we encounter an instruction, parameters to an instruction etc.
Definition
exception_utils.h:145
exception_utils.h
parse_field_width
void parse_field_width(std::string::const_iterator &it, format_tokent &curtok)
Definition
format_strings.cpp:50
parse_conversion_specifier
void parse_conversion_specifier(const std::string &arg_string, std::string::const_iterator &it, format_tokent &curtok)
Definition
format_strings.cpp:120
parse_flags
void parse_flags(std::string::const_iterator &it, format_tokent &curtok)
Definition
format_strings.cpp:21
parse_length_modifier
void parse_length_modifier(std::string::const_iterator &it, format_tokent &curtok)
Definition
format_strings.cpp:85
parse_format_string
format_token_listt parse_format_string(const std::string &arg_string)
Definition
format_strings.cpp:202
parse_precision
void parse_precision(std::string::const_iterator &it, format_tokent &curtok)
Definition
format_strings.cpp:64
get_type
std::optional< typet > get_type(const format_tokent &token)
Definition
format_strings.cpp:245
format_strings.h
Format String Parser.
format_token_listt
std::list< format_tokent > format_token_listt
Definition
format_strings.h:91
string2integer
const mp_integer string2integer(const std::string &n, unsigned base)
Definition
mp_arith.cpp:54
invariant.h
UNREACHABLE
#define UNREACHABLE
This should be used to mark dead code.
Definition
invariant.h:525
INVARIANT
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition
invariant.h:423
std_expr.h
API to expression classes.
ansi-c
goto-conversion
format_strings.cpp
Generated by
1.17.0