Skip to content

Commit 3d68f77

Browse files
committed
[vnotex] support inline formula
1 parent ce140dd commit 3d68f77

10 files changed

Lines changed: 305 additions & 10 deletions

File tree

parser_test/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ set(TEST_TARGETS
33
strikethrough_tests
44
image_tests
55
block_quote_tests
6-
mark_tests)
6+
mark_tests
7+
formula_inline_tests
8+
code_tests)
79

810
foreach(TARGET ${TEST_TARGETS})
911
add_executable(${TARGET} ${TARGET}.c test_utils.c)

parser_test/code_tests.c

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#include <cmark.h>
2+
3+
#include "test_utils.h"
4+
5+
int test_code_simple() {
6+
return test_xml("`code`",
7+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
8+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
9+
"<document sourcepos=\"1:1-1:6\" xmlns=\"http://commonmark.org/xml/1.0\">\n"
10+
" <paragraph sourcepos=\"1:1-1:6\">\n"
11+
" <code sourcepos=\"1:1-1:6\" xml:space=\"preserve\">code</code>\n"
12+
" </paragraph>\n"
13+
"</document>\n",
14+
CMARK_OPT_SOURCEPOS);
15+
}
16+
17+
int test_code_multiple() {
18+
return test_xml("`first` and `second`",
19+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
20+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
21+
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
22+
" <paragraph>\n"
23+
" <code xml:space=\"preserve\">first</code>\n"
24+
" <text xml:space=\"preserve\"> and </text>\n"
25+
" <code xml:space=\"preserve\">second</code>\n"
26+
" </paragraph>\n"
27+
"</document>\n",
28+
CMARK_OPT_DEFAULT);
29+
}
30+
31+
int test_code_with_spaces() {
32+
return test_xml("` code with spaces `",
33+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
34+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
35+
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
36+
" <paragraph>\n"
37+
" <code xml:space=\"preserve\"> code with spaces </code>\n"
38+
" </paragraph>\n"
39+
"</document>\n",
40+
CMARK_OPT_DEFAULT);
41+
}
42+
43+
int test_code_double_backticks() {
44+
return test_xml("``code with `backticks` inside``",
45+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
46+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
47+
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
48+
" <paragraph>\n"
49+
" <code xml:space=\"preserve\">code with `backticks` inside</code>\n"
50+
" </paragraph>\n"
51+
"</document>\n",
52+
CMARK_OPT_DEFAULT);
53+
}
54+
55+
int test_code_not_closed() {
56+
return test_xml("`not closed",
57+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
58+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
59+
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
60+
" <paragraph>\n"
61+
" <text xml:space=\"preserve\">`not closed</text>\n"
62+
" </paragraph>\n"
63+
"</document>\n",
64+
CMARK_OPT_DEFAULT);
65+
}
66+
67+
int test_code_empty() {
68+
return test_xml("``",
69+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
70+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
71+
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
72+
" <paragraph>\n"
73+
" <text xml:space=\"preserve\">``</text>\n"
74+
" </paragraph>\n"
75+
"</document>\n",
76+
CMARK_OPT_DEFAULT);
77+
}
78+
79+
int main() {
80+
CASE(test_code_simple);
81+
CASE(test_code_multiple);
82+
CASE(test_code_with_spaces);
83+
CASE(test_code_double_backticks);
84+
CASE(test_code_not_closed);
85+
CASE(test_code_empty);
86+
return 0;
87+
}

parser_test/formula_inline_tests.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#include <cmark.h>
2+
3+
#include "test_utils.h"
4+
5+
int test_formula_inline_simple() {
6+
return test_xml("$E=mc^2$",
7+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
8+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
9+
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
10+
" <paragraph>\n"
11+
" <formula_inline xml:space=\"preserve\">E=mc^2</formula_inline>\n"
12+
" </paragraph>\n"
13+
"</document>\n",
14+
CMARK_OPT_DEFAULT);
15+
}
16+
17+
int test_formula_inline_multiple() {
18+
return test_xml("$a+b$ and $c+d$",
19+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
20+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
21+
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
22+
" <paragraph>\n"
23+
" <formula_inline xml:space=\"preserve\">a+b</formula_inline>\n"
24+
" <text xml:space=\"preserve\"> and </text>\n"
25+
" <formula_inline xml:space=\"preserve\">c+d</formula_inline>\n"
26+
" </paragraph>\n"
27+
"</document>\n",
28+
CMARK_OPT_DEFAULT);
29+
}
30+
31+
int test_formula_inline_with_escape() {
32+
return test_xml("$a\\$b$",
33+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
34+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
35+
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
36+
" <paragraph>\n"
37+
" <formula_inline xml:space=\"preserve\">a$b</formula_inline>\n"
38+
" </paragraph>\n"
39+
"</document>\n",
40+
CMARK_OPT_DEFAULT);
41+
}
42+
43+
int test_formula_inline_not_closed() {
44+
return test_xml("$formula",
45+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
46+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
47+
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
48+
" <paragraph>\n"
49+
" <text xml:space=\"preserve\">$formula</text>\n"
50+
" </paragraph>\n"
51+
"</document>\n",
52+
CMARK_OPT_DEFAULT);
53+
}
54+
55+
int test_formula_inline_empty() {
56+
return test_xml("$$",
57+
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
58+
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
59+
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
60+
" <paragraph>\n"
61+
" <text xml:space=\"preserve\">$$</text>\n"
62+
" </paragraph>\n"
63+
"</document>\n",
64+
CMARK_OPT_DEFAULT);
65+
}
66+
67+
int main() {
68+
CASE(test_formula_inline_simple);
69+
CASE(test_formula_inline_multiple);
70+
CASE(test_formula_inline_with_escape);
71+
CASE(test_formula_inline_not_closed);
72+
CASE(test_formula_inline_empty);
73+
return 0;
74+
}

parser_test/image_tests.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ static int test_basic_image() {
55
const char *markdown = "![Alt text](image.png)\n";
66
const char *expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
77
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
8-
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
9-
" <paragraph>\n"
10-
" <image destination=\"image.png\">\n"
11-
" <text xml:space=\"preserve\">Alt text</text>\n"
8+
"<document sourcepos=\"1:1-1:22\" xmlns=\"http://commonmark.org/xml/1.0\">\n"
9+
" <paragraph sourcepos=\"1:1-1:22\">\n"
10+
" <image sourcepos=\"1:1-1:22\" destination=\"image.png\">\n"
11+
" <text sourcepos=\"1:3-1:10\" xml:space=\"preserve\">Alt text</text>\n"
1212
" </image>\n"
1313
" </paragraph>\n"
1414
"</document>\n";
15-
return test_xml(markdown, expected, CMARK_OPT_DEFAULT);
15+
return test_xml(markdown, expected, CMARK_OPT_SOURCEPOS);
1616
}
1717

1818
static int test_image_with_title() {

src/buffer.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,17 @@ void cmark_strbuf_unescape(cmark_strbuf *buf) {
207207

208208
cmark_strbuf_truncate(buf, w);
209209
}
210+
211+
// Destructively unescape a string: remove backslashes before @c.
212+
void cmark_strbuf_unescape_char(cmark_strbuf *buf, char c) {
213+
bufsize_t r, w;
214+
215+
for (r = 0, w = 0; r < buf->size; ++r) {
216+
if (buf->ptr[r] == '\\' && buf->ptr[r + 1] == c)
217+
r++;
218+
219+
buf->ptr[w++] = buf->ptr[r];
220+
}
221+
222+
cmark_strbuf_truncate(buf, w);
223+
}

src/buffer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ void cmark_strbuf_rtrim(cmark_strbuf *buf);
6666
void cmark_strbuf_trim(cmark_strbuf *buf);
6767
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
6868
void cmark_strbuf_unescape(cmark_strbuf *s);
69+
// Unescape the @c.
70+
void cmark_strbuf_unescape_char(cmark_strbuf *s, char c);
6971

7072
#ifdef __cplusplus
7173
}

src/cmark.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ typedef enum {
6161
CMARK_NODE_STRONG,
6262
CMARK_NODE_STRIKETHROUGH,
6363
CMARK_NODE_MARK,
64+
CMARK_NODE_FORMULA_INLINE,
6465
CMARK_NODE_LINK,
6566
CMARK_NODE_IMAGE,
6667

@@ -691,6 +692,9 @@ const char *cmark_version_string(void);
691692
#define NODE_CUSTOM_INLINE CMARK_NODE_CUSTOM_INLINE
692693
#define NODE_EMPH CMARK_NODE_EMPH
693694
#define NODE_STRONG CMARK_NODE_STRONG
695+
#define NODE_STRIKETHROUGH CMARK_NODE_STRIKETHROUGH
696+
#define NODE_MARK CMARK_NODE_MARK
697+
#define NODE_FORMULA_INLINE CMARK_NODE_FORMULA_INLINE
694698
#define NODE_LINK CMARK_NODE_LINK
695699
#define NODE_IMAGE CMARK_NODE_IMAGE
696700
#define BULLET_LIST CMARK_BULLET_LIST

src/inlines.c

Lines changed: 107 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,41 @@ static bufsize_t scan_to_closing_backticks(subject *subj,
357357
return 0;
358358
}
359359

360+
361+
// Try to process a dollar inline formula span that began with a
362+
// dollar (already parsed). Return 0 if you don't find matching closing
363+
// dollar, otherwise return the position in the subject
364+
// after the closing dollar.
365+
static bufsize_t scan_to_closing_dollar(subject *subj) {
366+
bufsize_t startpos = subj->pos;
367+
368+
// read non dollar
369+
unsigned char c;
370+
size_t slash_cnt = 0;
371+
// Directly skip the escaped dollar.
372+
while ((c = peek_char(subj)) && (c != '$' || slash_cnt % 2 == 1)) {
373+
if (c == '\\') {
374+
++slash_cnt;
375+
} else {
376+
slash_cnt = 0;
377+
}
378+
if (c == '\r' || c == '\n') {
379+
// Line break is not allowed.
380+
break;
381+
}
382+
advance(subj);
383+
}
384+
if (!is_eof(subj) && c == '$') {
385+
advance(subj);
386+
return (subj->pos);
387+
} else {
388+
// Rewind it.
389+
subj->pos = startpos;
390+
return 0;
391+
}
392+
}
393+
394+
360395
// Destructively modify string, converting newlines to
361396
// spaces, then removing a single leading + trailing space,
362397
// unless the code span consists entirely of space characters.
@@ -412,8 +447,9 @@ static cmark_node *handle_backticks(subject *subj, int options) {
412447
endpos - startpos - openticks.len);
413448
S_normalize_code(&buf);
414449

415-
cmark_node *node = make_literal(subj, CMARK_NODE_CODE, startpos,
416-
endpos - openticks.len - 1);
450+
// VNoteX: let's fix it to include the ticks.
451+
cmark_node *node = make_literal(subj, CMARK_NODE_CODE, startpos - openticks.len,
452+
endpos - 1);
417453
node->len = buf.size;
418454
node->data = cmark_strbuf_detach(&buf);
419455
adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
@@ -422,6 +458,70 @@ static cmark_node *handle_backticks(subject *subj, int options) {
422458
}
423459

424460

461+
// Parse dollar inline formula section or raw dollar, return an inline.
462+
// Assumes that the subject has a dollar at the current position.
463+
static cmark_node *handle_dollar(subject *subj) {
464+
bufsize_t initpos = subj->pos;
465+
// Skip the open dollar.
466+
advance(subj);
467+
bufsize_t startpos = subj->pos;
468+
469+
// Pre check.
470+
if (subj->pos > 1) {
471+
unsigned char before_char = peek_at(subj, subj->pos - 2);
472+
if (before_char == '$' ||
473+
(before_char >= '0' && before_char <= '9') ||
474+
before_char == '\\') {
475+
// Not a legal open dollar.
476+
return make_str(subj, initpos, initpos, cmark_chunk_literal("$"));
477+
}
478+
}
479+
480+
bufsize_t endpos = scan_to_closing_dollar(subj);
481+
if (endpos == 0) {
482+
subj->pos = startpos;
483+
return make_str(subj, initpos, initpos, cmark_chunk_literal("$"));
484+
}
485+
486+
// Post check.
487+
{
488+
// $$ is invalid.
489+
if (endpos - startpos == 1) {
490+
return make_str(subj, initpos, startpos, cmark_chunk_literal("$$"));
491+
}
492+
493+
// No space before the closing dollar.
494+
unsigned char before_char = peek_at(subj, endpos - 2);
495+
if (endpos - startpos == 1 || before_char == ' ' || before_char == '\t') {
496+
// Not a legal closing dollar.
497+
subj->pos = startpos;
498+
return make_str(subj, initpos, initpos, cmark_chunk_literal("$"));
499+
}
500+
501+
// No digit after the closing dollar.
502+
if (endpos < subj->input.len) {
503+
unsigned char after_char = peek_at(subj, endpos);
504+
if (after_char >= '0' && after_char <= '9') {
505+
// Not a legal closing dollar.
506+
subj->pos = startpos;
507+
return make_str(subj, initpos, initpos, cmark_chunk_literal("$"));
508+
}
509+
}
510+
}
511+
512+
cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
513+
514+
cmark_strbuf_set(&buf, subj->input.data + startpos,
515+
endpos - startpos - 1);
516+
cmark_strbuf_unescape_char(&buf, '$');
517+
518+
cmark_node *node = make_literal(subj, CMARK_NODE_FORMULA_INLINE, startpos, endpos - 2);
519+
node->len = buf.size;
520+
node->data = cmark_strbuf_detach(&buf);
521+
return node;
522+
}
523+
524+
425525
// Scan ***, **, or * and return number scanned, or 0.
426526
// Advances position.
427527
static int scan_delims(subject *subj, unsigned char c, bool *can_open,
@@ -1330,10 +1430,10 @@ static cmark_node *handle_newline(subject *subj) {
13301430

13311431
static bufsize_t subject_find_special_char(subject *subj, int options) {
13321432
// "\r\n\\`&_*[]<!"
1333-
// Add '~', '='.
1433+
// Add '~', '=', '$'.
13341434
static const int8_t SPECIAL_CHARS[256] = {
13351435
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1336-
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1436+
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
13371437
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13381438
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
13391439
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1391,6 +1491,9 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
13911491
case '`':
13921492
new_inl = handle_backticks(subj, options);
13931493
break;
1494+
case '$':
1495+
new_inl = handle_dollar(subj);
1496+
break;
13941497
case '\\':
13951498
new_inl = handle_backslash(subj);
13961499
break;

0 commit comments

Comments
 (0)