Have JSON line-comment tokens exclude the "\n"
Before this commit, line-comment token chains included the final "\n"
the way block-comments included the final "*/". The "\n" was mandatory.
However, when parsing with both QUIRK_ALLOW_COMMENT_LINE and
QUIRK_ALLOW_TRAILING_FILLER, input like "123//xyz" (without a "\n"
before the EOF) was rejected before and accepted after this commit.
So that line-comments are consistent, in that they either always or
never include the final "\n", they now never include the final "\n".
An input fragment (parsed with QUIRK_ALLOW_COMMENT_LINE) like
"//abc\n\t\t" was, before this commit, parsed as a 6 byte comment token
and a 2 byte other-filler token. That split is now 5+3.
diff --git a/example/jsonptr/jsonptr.cc b/example/jsonptr/jsonptr.cc
index 64e55ba..3d72c58 100644
--- a/example/jsonptr/jsonptr.cc
+++ b/example/jsonptr/jsonptr.cc
@@ -1074,7 +1074,7 @@
return write_dst_slow(s, n);
}
-#define TRY_INDENT_WITH_LEADING_NEW_LINE \
+#define TRY_INDENT \
do { \
uint32_t adj = (g_num_input_blank_lines > 1) ? 1 : 0; \
g_num_input_blank_lines = 0; \
@@ -1086,20 +1086,6 @@
} \
} while (false)
-// TRY_INDENT_SANS_LEADING_NEW_LINE is used after comments, which print their
-// own "\n".
-#define TRY_INDENT_SANS_LEADING_NEW_LINE \
- do { \
- uint32_t adj = (g_num_input_blank_lines > 1) ? 1 : 0; \
- g_num_input_blank_lines = 0; \
- uint32_t indent = g_depth * g_bytes_per_indent_depth; \
- TRY(write_dst(g_two_new_lines_then_256_indent_bytes + 2 - adj, \
- adj + (indent & 0xFF))); \
- for (indent >>= 8; indent > 0; indent--) { \
- TRY(write_dst(g_two_new_lines_then_256_indent_bytes + 2, 0x100)); \
- } \
- } while (false)
-
// ----
const char* //
@@ -1155,12 +1141,12 @@
(g_ctx != context::in_dict_after_brace) &&
!g_flags.compact_output) {
if (g_is_after_comment) {
- TRY_INDENT_SANS_LEADING_NEW_LINE;
+ TRY_INDENT;
} else {
if (g_flags.output_extra_comma) {
TRY(write_dst(",", 1));
}
- TRY_INDENT_WITH_LEADING_NEW_LINE;
+ TRY_INDENT;
}
} else {
g_num_input_blank_lines = 0;
@@ -1181,7 +1167,7 @@
// continuation of a multi-token chain.
if (start_of_token_chain) {
if (g_is_after_comment) {
- TRY_INDENT_SANS_LEADING_NEW_LINE;
+ TRY_INDENT;
} else if (g_ctx == context::in_dict_after_key) {
TRY(write_dst(": ", g_flags.compact_output ? 1 : 2));
} else if (g_ctx != context::none) {
@@ -1190,7 +1176,7 @@
TRY(write_dst(",", 1));
}
if (!g_flags.compact_output) {
- TRY_INDENT_WITH_LEADING_NEW_LINE;
+ TRY_INDENT;
}
}
@@ -1336,11 +1322,16 @@
if (g_flags.compact_output) {
TRY(write_dst(g_src.data.ptr + g_cursor_index - token_length,
token_length));
+ if (!t.continued() &&
+ (t.value_base_detail() &
+ WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_LINE)) {
+ TRY(write_dst("\n", 1));
+ }
} else {
if (start_of_token_chain) {
if (g_is_after_comment) {
- TRY_INDENT_SANS_LEADING_NEW_LINE;
+ TRY_INDENT;
} else if (g_ctx != context::none) {
if (g_ctx == context::in_dict_after_key) {
TRY(write_dst(":", 1));
@@ -1349,16 +1340,11 @@
(g_ctx != context::end_of_data)) {
TRY(write_dst(",", 1));
}
- TRY_INDENT_WITH_LEADING_NEW_LINE;
+ TRY_INDENT;
}
}
TRY(write_dst(g_src.data.ptr + g_cursor_index - token_length,
token_length));
- if (!t.continued() &&
- (t.value_base_detail() &
- WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_BLOCK)) {
- TRY(write_dst("\n", 1));
- }
g_is_after_comment = true;
}
if (g_ctx == context::in_list_after_bracket) {
@@ -1366,11 +1352,7 @@
} else if (g_ctx == context::in_dict_after_brace) {
g_ctx = context::in_dict_after_value;
}
- g_num_input_blank_lines =
- (t.value_base_detail() &
- WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_LINE)
- ? 1
- : 0;
+ g_num_input_blank_lines = 0;
} else {
uint8_t* p = g_src.data.ptr + g_cursor_index - token_length;
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index bd10aba..11d5e6d 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -33096,16 +33096,19 @@
}
while (true) {
if (((uint64_t)(io2_a_src - iop_a_src)) <= 0) {
- if (v_length > 0) {
+ if (a_src && a_src->meta.closed) {
+ *iop_a_dst++ = wuffs_base__make_token(
+ (((uint64_t)(4)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
+ (((uint64_t)(v_length)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ self->private_impl.f_comment_type = 2;
+ status = wuffs_base__make_status(NULL);
+ goto ok;
+ } else if (v_length > 0) {
*iop_a_dst++ = wuffs_base__make_token(
(((uint64_t)(4)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
(((uint64_t)(1)) << WUFFS_BASE__TOKEN__CONTINUED__SHIFT) |
(((uint64_t)(v_length)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
}
- if (a_src && a_src->meta.closed) {
- status = wuffs_base__make_status(wuffs_json__error__bad_input);
- goto exit;
- }
status = wuffs_base__make_status(wuffs_base__suspension__short_read);
WUFFS_BASE__COROUTINE_SUSPENSION_POINT_MAYBE_SUSPEND(6);
v_length = 0;
@@ -33113,10 +33116,9 @@
}
v_c = wuffs_base__peek_u8be__no_bounds_check(iop_a_src);
if (v_c == 10) {
- iop_a_src += 1;
*iop_a_dst++ = wuffs_base__make_token(
(((uint64_t)(4)) << WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) |
- (((uint64_t)((v_length + 1))) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
+ (((uint64_t)(v_length)) << WUFFS_BASE__TOKEN__LENGTH__SHIFT));
self->private_impl.f_comment_type = 2;
status = wuffs_base__make_status(NULL);
goto ok;
diff --git a/std/json/decode_json.wuffs b/std/json/decode_json.wuffs
index bd83ae8..7575633 100644
--- a/std/json/decode_json.wuffs
+++ b/std/json/decode_json.wuffs
@@ -1530,7 +1530,16 @@
pre args.dst.length() > 0,
{
if args.src.length() <= 0 {
- if length > 0 {
+ if args.src.is_closed() {
+ args.dst.write_simple_token_fast!(
+ value_major: 0,
+ value_minor: (base.TOKEN__VBC__FILLER << 21) |
+ base.TOKEN__VBD__FILLER__COMMENT_LINE,
+ continued: 0,
+ length: length)
+ this.comment_type = 2
+ return ok
+ } else if length > 0 {
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__FILLER << 21) |
@@ -1538,9 +1547,6 @@
continued: 1,
length: length)
}
- if args.src.is_closed() {
- return "#bad input"
- }
yield? base."$short read"
length = 0
continue.comment_line
@@ -1548,13 +1554,12 @@
c = args.src.peek_u8()
if c == '\n' {
- args.src.skip_u32_fast!(actual: 1, worst_case: 1)
args.dst.write_simple_token_fast!(
value_major: 0,
value_minor: (base.TOKEN__VBC__FILLER << 21) |
base.TOKEN__VBD__FILLER__COMMENT_LINE,
continued: 0,
- length: length + 1)
+ length: length)
this.comment_type = 2
return ok
}
diff --git a/std/json/decode_quirks.wuffs b/std/json/decode_quirks.wuffs
index f403033..95c29fc 100644
--- a/std/json/decode_quirks.wuffs
+++ b/std/json/decode_quirks.wuffs
@@ -109,15 +109,17 @@
// When this quirk is enabled, "// C/C++ style line comments\n" are accepted
// anywhere whitespace would be. See also QUIRK_ALLOW_TRAILING_FILLER.
//
-// A line comment may not omit the ending "\n", even if there is no input
-// afterwards (i.e. the prospective line comment ends with the end-of-file).
+// Line comments are terminated by a '\n' byte. '\r' bytes are irrelevant. When
+// combined with QUIRK_ALLOW_TRAILING_FILLER, a line comment at the end of the
+// file may omit the final '\n' byte.
//
// They produce WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_LINE tokens. The token
-// chain's source bytes includes the starting "//" and the ending "\n".
+// chain's source bytes includes the starting "//" but always excludes the
+// ending "\n", regardless of whether it met the end of the file instead.
//
// Even if the line comments are on consecutive lines, each line comment is a
-// separate token chain. There may be whitespace tokens between one line
-// comment's ending "\n" and the next one's starting "//".
+// separate token chain. Between them is at least one "\n" and maybe other
+// whitespace, which are not part of a line comment token chain.
//
// To avoid ambiguity (as comments can contain new lines), this quirk cannot be
// combined with QUIRK_EXPECT_TRAILING_NEW_LINE_OR_EOF.
diff --git a/test/c/std/json.c b/test/c/std/json.c
index 7e9812c..1d76f9a 100644
--- a/test/c/std/json.c
+++ b/test/c/std/json.c
@@ -3451,7 +3451,7 @@
"306 /*foo*/ \n", //
"307 /*foo*/ \n\n", //
"308/*bar\nbaz*/\n\n", //
- "309 // qux\n", // TODO: drop the "\n".
+ "309 // qux", //
"310 // qux\n", //
"311 // qux\n\n", //
"312 /*c0*/ /*c1*/\n\n", //
diff --git a/test/data/json-quirks.tokens b/test/data/json-quirks.tokens
index d7b08ab..9ffd0f8 100644
--- a/test/data/json-quirks.tokens
+++ b/test/data/json-quirks.tokens
Binary files differ