Describe the bug
I wanted to write a program that required a YAML parser, so I decided to use prantlf.yaml . This repository hasn't been updated in a while, but the code looks pretty valid, except for the directives with @VROOT instead of @VMODROOT (I already submitted a PR with a fix, but @prantlf hasn't responded yet).
An old issue that hadn't been answered caught my attention. I decided to investigate the bug myself and submit another PR, but eventually, I debugged it and discovered that the problem most likely lies outside the prantlf.yaml package, although it only occurs in code that's complex enough that I can't reproduce it with a more minimalistic example.
Please be careful and patient while reading the following.
Reproduction Steps
Install the prantlf.yaml package, manually change all references to @VROOT to @VMODROOT in the prantlf/yaml/src/yaml.c.v file, and then run the following code:
module main
import prantlf.yaml
fn main () {
input := r 'answer:
- ans: 1
- ans: 2'
a := yaml.parse_text (input)!
println (a)
}
Expected Behavior
{answer:[{ans:1},{ans:2}]}
Current Behavior
Possible Solution
No response
Additional Information/Context
The YAML code from the example above is an object (or mapping in YAML terms), which contains one array (or sequence in YAML terms), which contains two more objects , each with a single field with the values 1 and 2, respectively.
The following two functions are responsible for parsing objects and arrays in prantlf.yaml:
fn parse_object (parser & C.yaml_parser_t) ! Any {
buf := []u8 {len: int (sizeof (C.yaml_event_t))}
event := & C.yaml_event_t (& buf)
mut object := map [string ]Any{}
mut is_key := false
mut key := ''
for {
if C.yaml_parser_parse (parser, event) == 0 {
return fail_parse (parser)
}
defer {
C.yaml_event_delete (event)
}
match event.@type {
C.YAML_MAPPING_START_EVENT {
if is_key {
object[key] = parse_object (parser)!
is_key = false
} else {
return fail_decode ('key expected' , event)
}
}
C.YAML_SEQUENCE_START_EVENT {
if is_key {
object[key] = parse_array (parser)!
is_key = false
} else {
return fail_decode ('key expected' , event)
}
}
C.YAML_SCALAR_EVENT {
if is_key {
object[key] = parse_value (parser, event)!
is_key = false
} else {
key = parse_string (parser, event)!
is_key = true
}
}
C.YAML_MAPPING_END_EVENT {
return object
}
C.YAML_NO_EVENT, C.YAML_STREAM_START_EVENT, C.YAML_STREAM_END_EVENT,
C.YAML_DOCUMENT_START_EVENT, C.YAML_DOCUMENT_END_EVENT {
return fail_decode ('unexpected yaml event ${event.@type} ' , event)
}
else {
return fail_decode ('unrecognised yaml event ${event.@type} ' , event)
}
}
}
panic ('unreachable code' )
}
fn parse_array (parser & C.yaml_parser_t) ! Any {
buf := []u8 {len: int (sizeof (C.yaml_event_t))}
event := & C.yaml_event_t (& buf)
mut array := []Any{}
for {
if C.yaml_parser_parse (parser, event) == 0 {
return fail_parse (parser)
}
defer {
C.yaml_event_delete (event)
}
match event.@type {
C.YAML_MAPPING_START_EVENT {
array << parse_object (parser)!
}
C.YAML_SEQUENCE_START_EVENT {
array << parse_array (parser)!
}
C.YAML_SCALAR_EVENT {
array << parse_value (parser, event)!
}
C.YAML_SEQUENCE_END_EVENT {
return array
}
C.YAML_NO_EVENT, C.YAML_STREAM_START_EVENT, C.YAML_STREAM_END_EVENT,
C.YAML_DOCUMENT_START_EVENT, C.YAML_DOCUMENT_END_EVENT {
return fail_decode ('unexpected yaml event ${event.@type} ' , event)
}
else {
return fail_decode ('unrecognised yaml event ${event.@type} ' , event)
}
}
}
panic ('unreachable code' )
}
In the loop for parsing the answer array (in the parse_array function), the parser receives two YAML_MAPPING_START_EVENT events, and the parse_object function is called for each. The error occurs because the first element of the array is reset to zeroes during the call to parse_object for the second object ({ans:2}). Why would this happen if neither the object nor any memory associated with it is passed to the parse_object function?
Debugging this further, I discovered that the zeroing occurs during the external call C.yaml_parser_parse(parser, event), which simply fills the event structure passed in the second argument with data. Strange, right? I assumed that the memory allocated for this structure somehow overlaps with the array, and I was right.
I ran the following commands to get the C code and debug it:
$ v -cc gcc -o foo.c .
$ gcc -g -o foo foo.c -I$HOME/.vmodules/prantlf/yaml/libyaml/include $HOME/.vmodules/prantlf/yaml/libyaml/src/*.c $HOME/src/v/thirdparty/tcc/lib/libgc.a
Here is the result of transpiling the two functions above:
VV_LOC _result_prantlf__jany__Any prantlf__yaml__parse_object (yaml_parser_t * parser ) {
Array_u8 * buf = HEAP (Array_u8 , (builtin____new_array_with_default_noscan (((int )(sizeof (yaml_event_t ))), 0 , sizeof (u8 ), 0 )));
yaml_event_t * event = ((yaml_event_t * )(& (* (buf ))));
Map_string_prantlf__jany__Any object = builtin__new_map (sizeof (string ), sizeof (prantlf__jany__Any ), & builtin__map_hash_string , & builtin__map_eq_string , & builtin__map_clone_string , & builtin__map_free_string )
;
bool is_key = false;
string key = _S ("" );
for (;;) {
// [ BREAKPOINT 1 ] on the next line
if (yaml_parser_parse (parser , event ) == 0 ) {
return (_result_prantlf__jany__Any ){ .is_error = true, .err = I_prantlf__yaml__YamlError_to_Interface_IError (HEAP (prantlf__yaml__YamlError , (prantlf__yaml__fail_parse (parser )))), .data = {E_STRUCT } };
}
if (event -> type == (YAML_MAPPING_START_EVENT )) {
if (is_key ) {
_result_prantlf__jany__Any _t2 = prantlf__yaml__parse_object (parser );
if (_t2 .is_error ) {
{ // defer begin
yaml_event_delete (event );
} // defer end
_result_prantlf__jany__Any _t3 = {0 };
_t3 .is_error = true;
_t3 .err = _t2 .err ;
return _t3 ;
}
builtin__map_set (& object , & (string []){key }, & (prantlf__jany__Any []) { (* (prantlf__jany__Any * )_t2 .data ) });
is_key = false;
} else {
_result_prantlf__jany__Any _t4 = (_result_prantlf__jany__Any ){ .is_error = true, .err = I_prantlf__yaml__YamlError_to_Interface_IError (HEAP (prantlf__yaml__YamlError , (prantlf__yaml__fail_decode (_S ("key expected" ), event )))), .data = {E_STRUCT } };
{ // defer begin
yaml_event_delete (event );
} // defer end
return _t4 ;
}
}
else if (event -> type == (YAML_SEQUENCE_START_EVENT )) {
if (is_key ) {
_result_prantlf__jany__Any _t5 = prantlf__yaml__parse_array (parser );
if (_t5 .is_error ) {
{ // defer begin
yaml_event_delete (event );
} // defer end
_result_prantlf__jany__Any _t6 = {0 };
_t6 .is_error = true;
_t6 .err = _t5 .err ;
return _t6 ;
}
builtin__map_set (& object , & (string []){key }, & (prantlf__jany__Any []) { (* (prantlf__jany__Any * )_t5 .data ) });
is_key = false;
} else {
_result_prantlf__jany__Any _t7 = (_result_prantlf__jany__Any ){ .is_error = true, .err = I_prantlf__yaml__YamlError_to_Interface_IError (HEAP (prantlf__yaml__YamlError , (prantlf__yaml__fail_decode (_S ("key expected" ), event )))), .data = {E_STRUCT } };
{ // defer begin
yaml_event_delete (event );
} // defer end
return _t7 ;
}
}
else if (event -> type == (YAML_SCALAR_EVENT )) {
if (is_key ) {
_result_prantlf__jany__Any _t8 = prantlf__yaml__parse_value (parser , event );
if (_t8 .is_error ) {
{ // defer begin
yaml_event_delete (event );
} // defer end
_result_prantlf__jany__Any _t9 = {0 };
_t9 .is_error = true;
_t9 .err = _t8 .err ;
return _t9 ;
}
builtin__map_set (& object , & (string []){key }, & (prantlf__jany__Any []) { (* (prantlf__jany__Any * )_t8 .data ) });
is_key = false;
} else {
_result_string _t10 = prantlf__yaml__parse_string (parser , event );
if (_t10 .is_error ) {
{ // defer begin
yaml_event_delete (event );
} // defer end
_result_prantlf__jany__Any _t11 = {0 };
_t11 .is_error = true;
_t11 .err = _t10 .err ;
return _t11 ;
}
key = (* (string * )_t10 .data );
is_key = true;
}
}
else if (event -> type == (YAML_MAPPING_END_EVENT )) {
_result_prantlf__jany__Any _t12 ;
builtin___result_ok (& (prantlf__jany__Any []) { Map_string_prantlf__jany__Any_to_sumtype_prantlf__jany__Any (& object , false) }, (_result * )(& _t12 ), sizeof (prantlf__jany__Any ));
{ // defer begin
yaml_event_delete (event );
} // defer end
return _t12 ;
}
else if (event -> type == (YAML_NO_EVENT ) || event -> type == (YAML_STREAM_START_EVENT ) || event -> type == (YAML_STREAM_END_EVENT ) || event -> type == (YAML_DOCUMENT_START_EVENT ) || event -> type == (YAML_DOCUMENT_END_EVENT )) {
_result_prantlf__jany__Any _t13 = (_result_prantlf__jany__Any ){ .is_error = true, .err = I_prantlf__yaml__YamlError_to_Interface_IError (HEAP (prantlf__yaml__YamlError , (prantlf__yaml__fail_decode (builtin__str_intp (2 , _MOV ((StrIntpData []){{_S ("unexpected yaml event " ), 0xfe07 , {.d_i32 = event -> type }}, {_SLIT0 , 0 , { .d_c = 0 }}})), event )))), .data = {E_STRUCT } };
{ // defer begin
yaml_event_delete (event );
} // defer end
return _t13 ;
}
else {
_result_prantlf__jany__Any _t14 = (_result_prantlf__jany__Any ){ .is_error = true, .err = I_prantlf__yaml__YamlError_to_Interface_IError (HEAP (prantlf__yaml__YamlError , (prantlf__yaml__fail_decode (builtin__str_intp (2 , _MOV ((StrIntpData []){{_S ("unrecognised yaml event " ), 0xfe07 , {.d_i32 = event -> type }}, {_SLIT0 , 0 , { .d_c = 0 }}})), event )))), .data = {E_STRUCT } };
{ // defer begin
yaml_event_delete (event );
} // defer end
return _t14 ;
}
{ // defer begin
yaml_event_delete (event );
} // defer end
}
builtin___v_panic (_S ("unreachable code" ));
VUNREACHABLE ();
return (_result_prantlf__jany__Any ){0 };
}
VV_LOC _result_prantlf__jany__Any prantlf__yaml__parse_array (yaml_parser_t * parser ) {
Array_u8 * buf = HEAP (Array_u8 , (builtin____new_array_with_default_noscan (((int )(sizeof (yaml_event_t ))), 0 , sizeof (u8 ), 0 )));
yaml_event_t * event = ((yaml_event_t * )(& (* (buf ))));
Array_prantlf__jany__Any __v_array = builtin____new_array_with_default (0 , 0 , sizeof (prantlf__jany__Any ), 0 );
for (;;) {
if (yaml_parser_parse (parser , event ) == 0 ) {
return (_result_prantlf__jany__Any ){ .is_error = true, .err = I_prantlf__yaml__YamlError_to_Interface_IError (HEAP (prantlf__yaml__YamlError , (prantlf__yaml__fail_parse (parser )))), .data = {E_STRUCT } };
}
if (event -> type == (YAML_MAPPING_START_EVENT )) {
_result_prantlf__jany__Any _t3 = prantlf__yaml__parse_object (parser );
if (_t3 .is_error ) {
{ // defer begin
yaml_event_delete (event );
} // defer end
_result_prantlf__jany__Any _t4 = {0 };
_t4 .is_error = true;
_t4 .err = _t3 .err ;
return _t4 ;
}
// [ BREAKPOINT 2 ] on the next line
builtin__array_push ((array * )& __v_array , _MOV ((prantlf__jany__Any []){ (* (prantlf__jany__Any * )_t3 .data ) }));
}
else if (event -> type == (YAML_SEQUENCE_START_EVENT )) {
_result_prantlf__jany__Any _t6 = prantlf__yaml__parse_array (parser );
if (_t6 .is_error ) {
{ // defer begin
yaml_event_delete (event );
} // defer end
_result_prantlf__jany__Any _t7 = {0 };
_t7 .is_error = true;
_t7 .err = _t6 .err ;
return _t7 ;
}
builtin__array_push ((array * )& __v_array , _MOV ((prantlf__jany__Any []){ (* (prantlf__jany__Any * )_t6 .data ) }));
}
else if (event -> type == (YAML_SCALAR_EVENT )) {
_result_prantlf__jany__Any _t9 = prantlf__yaml__parse_value (parser , event );
if (_t9 .is_error ) {
{ // defer begin
yaml_event_delete (event );
} // defer end
_result_prantlf__jany__Any _t10 = {0 };
_t10 .is_error = true;
_t10 .err = _t9 .err ;
return _t10 ;
}
builtin__array_push ((array * )& __v_array , _MOV ((prantlf__jany__Any []){ (* (prantlf__jany__Any * )_t9 .data ) }));
}
else if (event -> type == (YAML_SEQUENCE_END_EVENT )) {
_result_prantlf__jany__Any _t11 ;
builtin___result_ok (& (prantlf__jany__Any []) { Array_prantlf__jany__Any_to_sumtype_prantlf__jany__Any (& __v_array , false) }, (_result * )(& _t11 ), sizeof (prantlf__jany__Any ));
{ // defer begin
yaml_event_delete (event );
} // defer end
return _t11 ;
}
else if (event -> type == (YAML_NO_EVENT ) || event -> type == (YAML_STREAM_START_EVENT ) || event -> type == (YAML_STREAM_END_EVENT ) || event -> type == (YAML_DOCUMENT_START_EVENT ) || event -> type == (YAML_DOCUMENT_END_EVENT )) {
_result_prantlf__jany__Any _t12 = (_result_prantlf__jany__Any ){ .is_error = true, .err = I_prantlf__yaml__YamlError_to_Interface_IError (HEAP (prantlf__yaml__YamlError , (prantlf__yaml__fail_decode (builtin__str_intp (2 , _MOV ((StrIntpData []){{_S ("unexpected yaml event " ), 0xfe07 , {.d_i32 = event -> type }}, {_SLIT0 , 0 , { .d_c = 0 }}})), event )))), .data = {E_STRUCT } };
{ // defer begin
yaml_event_delete (event );
} // defer end
return _t12 ;
}
else {
_result_prantlf__jany__Any _t13 = (_result_prantlf__jany__Any ){ .is_error = true, .err = I_prantlf__yaml__YamlError_to_Interface_IError (HEAP (prantlf__yaml__YamlError , (prantlf__yaml__fail_decode (builtin__str_intp (2 , _MOV ((StrIntpData []){{_S ("unrecognised yaml event " ), 0xfe07 , {.d_i32 = event -> type }}, {_SLIT0 , 0 , { .d_c = 0 }}})), event )))), .data = {E_STRUCT } };
{ // defer begin
yaml_event_delete (event );
} // defer end
return _t13 ;
}
{ // defer begin
yaml_event_delete (event );
} // defer end
}
builtin___v_panic (_S ("unreachable code" ));
VUNREACHABLE ();
return (_result_prantlf__jany__Any ){0 };
}
I've commented out the two lines where I set breakpoints. Next, I launched debug and pressed F5 several times — I needed to get to BREAKPOINT 1 after I'd visited BREAKPOINT 2 once. Below are the commands I entered into the debug console and their results:
(lldb) print event
(yaml_event_t *) 0x00007ffff7d7c600
(lldb) print sizeof(*event)
(unsigned long) 104
(lldb) frame select 1
(lldb) print __v_array.data
(voidptr) 0x00007ffff7d7c630
(lldb) frame select 0
(lldb) memory read -s 8 -f x 0x00007ffff7d7c630
0x7ffff7d7c630: 0x00007ffff7d5fc80 0x000000000000008c
0x7ffff7d7c640: 0x0000000000000000 0x0000000000000000
0x7ffff7d7c650: 0x0000000000000000 0x0000000000000000
0x7ffff7d7c660: 0x0000000000000000 0x0000000000000000
(lldb) n
(lldb) memory read -s 8 -f x 0x00007ffff7d7c630
0x7ffff7d7c630: 0x0000000000000001 0x0000000000000013
0x7ffff7d7c640: 0x0000000000000002 0x0000000000000002
0x7ffff7d7c650: 0x0000000000000016 0x0000000000000002
0x7ffff7d7c660: 0x0000000000000005 0x0000000000000000
As you can see, the space between the start of the memory allocated for the event structure and the start of the memory allocated for the array array is only 0x30 (= 48 bytes) , while the event structure takes up 104 bytes .
How could this happen? After all, Boehm GC handles all memory allocations in the program.
Ultimately, I don't know where the error lies: in the V compiler, in the Boehm GC, or in how the code generated by V interacts with Boehm GC. But I don't see any reason for this behavior in the prantlf.yaml package code. Also, the code generated by V looks quite valid, to my eye.
In any case, such automatic memory management bugs are unjustifiable and must be fixed.
V version
V 0.5.1 4dc97d9
Environment details (OS name and version, etc.)
Arch Linux 6.19.8-arch1-1
Note
You can use the 👍 reaction to increase the issue's priority for developers.
Please note that only the 👍 reaction to the issue itself counts as a vote.
Other reactions and those to comments will not be taken into account.
Describe the bug
I wanted to write a program that required a YAML parser, so I decided to use prantlf.yaml. This repository hasn't been updated in a while, but the code looks pretty valid, except for the directives with
@VROOTinstead of@VMODROOT(I already submitted a PR with a fix, but @prantlf hasn't responded yet).An old issue that hadn't been answered caught my attention. I decided to investigate the bug myself and submit another PR, but eventually, I debugged it and discovered that the problem most likely lies outside the
prantlf.yamlpackage, although it only occurs in code that's complex enough that I can't reproduce it with a more minimalistic example.Please be careful and patient while reading the following.
Reproduction Steps
Install the
prantlf.yamlpackage, manually change all references to@VROOTto@VMODROOTin theprantlf/yaml/src/yaml.c.vfile, and then run the following code:Expected Behavior
Current Behavior
Possible Solution
No response
Additional Information/Context
The YAML code from the example above is an object (or mapping in YAML terms), which contains one array (or sequence in YAML terms), which contains two more objects, each with a single field with the values 1 and 2, respectively.
The following two functions are responsible for parsing objects and arrays in
prantlf.yaml:In the loop for parsing the
answerarray (in theparse_arrayfunction), the parser receives twoYAML_MAPPING_START_EVENTevents, and theparse_objectfunction is called for each. The error occurs because the first element of thearrayis reset to zeroes during the call toparse_objectfor the second object ({ans:2}). Why would this happen if neither the object nor any memory associated with it is passed to theparse_objectfunction?Debugging this further, I discovered that the zeroing occurs during the external call
C.yaml_parser_parse(parser, event), which simply fills theeventstructure passed in the second argument with data. Strange, right? I assumed that the memory allocated for this structure somehow overlaps with thearray, and I was right.I ran the following commands to get the C code and debug it:
Here is the result of transpiling the two functions above:
I've commented out the two lines where I set breakpoints. Next, I launched debug and pressed F5 several times — I needed to get to
BREAKPOINT 1after I'd visitedBREAKPOINT 2once. Below are the commands I entered into the debug console and their results:As you can see, the space between the start of the memory allocated for the
eventstructure and the start of the memory allocated for thearrayarray is only 0x30 (= 48 bytes), while theeventstructure takes up 104 bytes.How could this happen? After all, Boehm GC handles all memory allocations in the program.
Ultimately, I don't know where the error lies: in the V compiler, in the Boehm GC, or in how the code generated by V interacts with Boehm GC. But I don't see any reason for this behavior in the
prantlf.yamlpackage code. Also, the code generated by V looks quite valid, to my eye.In any case, such automatic memory management bugs are unjustifiable and must be fixed.
V version
V 0.5.1 4dc97d9
Environment details (OS name and version, etc.)
Arch Linux 6.19.8-arch1-1
Note
You can use the 👍 reaction to increase the issue's priority for developers.
Please note that only the 👍 reaction to the issue itself counts as a vote.
Other reactions and those to comments will not be taken into account.