diff --git a/EMBEDDING.md b/EMBEDDING.md index 327e923..83d24a1 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -111,8 +111,74 @@ The tool supports an extended glob syntax for matching lines: By default, patterns imply a wildcard (`*`) at both the start and end. Use `^` and `$` to disable this behavior and match the exact line start or end. -If you need to match a literal `^` at the start of a line, use `^^`. -Similarly, use `$$` to match a literal `$` at the end of a line. +#### Multi-line patterns + +Use `\n` inside a `start`, `end`, or `line` pattern to match consecutive source lines. +Spaces around `\n` are ignored, and each pattern line uses the same glob syntax as a +regular one-line pattern. + +````markdown + +```java +``` +```` + +This matches a source range like: + +```java +@Test +@DisplayName("adds two values") +void addsTwoValues() { + int value = 1 + 1; + + assertEquals(2, value); +} +``` + +The `start` pattern above is interpreted as two consecutive line patterns: +`Test` and `adds two values`. Because ordinary patterns imply `*` at both ends, +these match `@Test` and `@DisplayName("adds two values")`. + +Use `^` and `$` on each pattern line when you need exact line matching: + +````markdown + +```java +``` +```` + +Without `\n`, a `start`, `end`, or `line` pattern matches only one source line. + +#### Escaping + +Use a backslash to match glob control characters literally. For example: + +- `\*` matches a literal `*`. +- `\?` matches a literal `?`. +- `\[` matches a literal `[`. + +Since `^` is only special at the start of a pattern, use `^^` to match a literal +`^` there. Since `$` is only special at the end of a pattern, use `$$` to match a +literal `$` there. + +To match literal `\n` text in a source line, write it as `\\n` in the pattern. + +````markdown + +```java +``` +```` + +It's possible to write quote characters in patterns as `\"` instead of the XML entity `"`. +For example, `line="println(\"Hello\")"` is equivalent to `line="println("Hello")"`. ## Comment filtering diff --git a/embedding/embedding_test.go b/embedding/embedding_test.go index f2857c3..3ae82c4 100644 --- a/embedding/embedding_test.go +++ b/embedding/embedding_test.go @@ -197,6 +197,65 @@ var _ = Describe("Embedding", func() { Expect(processor.IsUpToDate()).Should(BeTrue()) }) + It("should embed a method with escaped newline patterns", func() { + config.DocIncludes = []string{"escaped-newline-pattern.md"} + docPath := fmt.Sprintf("%s/escaped-newline-pattern.md", config.DocumentationRoot) + processor := embedding.NewProcessor(docPath, config) + + Expect(processor.Embed()).Error().ShouldNot(HaveOccurred()) + + docContent, err := os.ReadFile(docPath) + Expect(err).ShouldNot(HaveOccurred()) + Expect(string(docContent)).Should(ContainSubstring("@Test\n" + + "@DisplayName(\"adds two values\")")) + Expect(string(docContent)).Should(ContainSubstring("assertEquals(2, value);\n}")) + Expect(string(docContent)).ShouldNot(ContainSubstring("subtractsTwoValues")) + }) + + It("should embed a method with exact escaped newline patterns", func() { + config.DocIncludes = []string{"escaped-newline-exact-pattern.md"} + docPath := fmt.Sprintf("%s/escaped-newline-exact-pattern.md", config.DocumentationRoot) + processor := embedding.NewProcessor(docPath, config) + + Expect(processor.Embed()).Error().ShouldNot(HaveOccurred()) + + docContent, err := os.ReadFile(docPath) + Expect(err).ShouldNot(HaveOccurred()) + Expect(string(docContent)).Should(ContainSubstring("@Test\n" + + "@DisplayName(\"adds two values\")")) + Expect(string(docContent)).Should(ContainSubstring("assertEquals(2, value);\n}")) + Expect(string(docContent)).ShouldNot(ContainSubstring("subtractsTwoValues")) + }) + + It("should embed matching lines with an escaped newline line pattern", func() { + config.DocIncludes = []string{"escaped-newline-line-pattern.md"} + docPath := fmt.Sprintf("%s/escaped-newline-line-pattern.md", config.DocumentationRoot) + processor := embedding.NewProcessor(docPath, config) + + Expect(processor.Embed()).Error().ShouldNot(HaveOccurred()) + + docContent, err := os.ReadFile(docPath) + Expect(err).ShouldNot(HaveOccurred()) + Expect(string(docContent)).Should(ContainSubstring("@Test\n" + + "@DisplayName(\"adds two values\")")) + Expect(string(docContent)).ShouldNot(ContainSubstring("void addsTwoValues")) + Expect(string(docContent)).ShouldNot(ContainSubstring("subtractsTwoValues")) + }) + + It("should embed a line with an escaped newline literal pattern", func() { + config.DocIncludes = []string{"escaped-newline-literal-pattern.md"} + docPath := fmt.Sprintf("%s/escaped-newline-literal-pattern.md", config.DocumentationRoot) + processor := embedding.NewProcessor(docPath, config) + + Expect(processor.Embed()).Error().ShouldNot(HaveOccurred()) + + docContent, err := os.ReadFile(docPath) + Expect(err).ShouldNot(HaveOccurred()) + Expect(string(docContent)).Should(ContainSubstring( + "private static final String MY_STRING = \"\\n\";", + )) + }) + It("should report a missing closing tag", func() { docPath := fmt.Sprintf("%s/missing-closing-tag.md", config.DocumentationRoot) processor := embedding.NewProcessor(docPath, config) diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index 8628455..06a384d 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -191,6 +191,18 @@ func (e Instruction) String() string { // lines — a list of strings representing the input lines. func (e Instruction) matchingLines(lines []string, codeFileReference string) ([]string, error) { if e.LinePattern != nil { + if e.LinePattern.HasLineSeparator() { + startPosition, endPosition, err := e.matchLineSequence( + e.LinePattern, lines, 0, "line", codeFileReference, + ) + if err != nil { + return nil, err + } + requiredLines := lines[startPosition : endPosition+1] + indentation := indent.MaxCommonIndentation(requiredLines) + + return indent.CutIndent(requiredLines, indentation), nil + } linePosition, err := e.matchGlob( e.LinePattern, lines, 0, "line", codeFileReference, ) @@ -238,19 +250,71 @@ func (e Instruction) matchingLines(lines []string, codeFileReference string) ([] // startFrom — an index from which to start searching. func (e Instruction) matchGlob(pattern *Pattern, lines []string, startFrom int, kind string, codeFileReference string) (int, error) { + if pattern.HasLineSeparator() { + start, end, err := e.matchLineSequence( + pattern, lines, startFrom, kind, codeFileReference, + ) + if err != nil { + return 0, err + } + if kind == "end" { + return end, nil + } + return start, nil + } + if line, found := matchSingleLine(pattern, lines, startFrom); found { + return line, nil + } + return 0, PatternNotFoundError{ + Line: e.DocumentationLine, + CodeFileReference: codeFileReference, + Kind: kind, + Pattern: pattern, + } +} + +// matchSingleLine returns the first source line matching the pattern. +func matchSingleLine(pattern *Pattern, lines []string, startFrom int) (int, bool) { lineCount := len(lines) resultLine := startFrom for resultLine < lineCount { line := lines[resultLine] if pattern.Match(line) { - return resultLine, nil + return resultLine, true } resultLine++ } - return 0, PatternNotFoundError{ + + return 0, false +} + +// matchLineSequence returns the first line range matching the pattern or a not-found error. +func (e Instruction) matchLineSequence(pattern *Pattern, lines []string, startFrom int, + kind string, codeFileReference string) (int, int, error) { + start, end, found := matchLineSequence(pattern, lines, startFrom) + if found { + return start, end, nil + } + + return 0, 0, PatternNotFoundError{ Line: e.DocumentationLine, CodeFileReference: codeFileReference, Kind: kind, Pattern: pattern, } } + +// matchLineSequence returns the first source-line range matching an escaped-line pattern. +func matchLineSequence(pattern *Pattern, lines []string, startFrom int) (int, int, bool) { + patterns := pattern.lineSequencePatterns() + lineCount := len(patterns) + lastStart := len(lines) - lineCount + for start := startFrom; start <= lastStart; start++ { + end := start + lineCount + if matchLineSequencePatterns(patterns, lines[start:end]) { + return start, end - 1, true + } + } + + return 0, 0, false +} diff --git a/embedding/parsing/instruction_test.go b/embedding/parsing/instruction_test.go index 0425f98..f89c1a8 100644 --- a/embedding/parsing/instruction_test.go +++ b/embedding/parsing/instruction_test.go @@ -85,6 +85,15 @@ var _ = Describe("Instruction", func() { Expect(parsing.FromXML(xmlString, config)).Error().ShouldNot(HaveOccurred()) }) + It("should parse backslash-escaped quotes in XML attributes", func() { + xmlString := `` + + attributes, err := parsing.ParseXMLLine(xmlString) + + Expect(err).ShouldNot(HaveOccurred()) + Expect(attributes["line"]).Should(Equal(`println("Hello world")`)) + }) + It("should have an error for unsupported comments mode", func() { instructionParams := TestInstructionParams{ comments: "summary", @@ -301,6 +310,59 @@ var _ = Describe("Instruction", func() { })) }) + It("should embed a line with an escaped asterisk pattern", func() { + instructionParams := TestInstructionParams{ + lineGlob: `Use \* to multiply`, + } + + actualLines := getXMLExtractionContent( + "literal-patterns.txt", instructionParams, config) + + Expect(actualLines).Should(Equal([]string{ + "Use * to multiply", + })) + }) + + It("should embed a line starting with a literal caret pattern", func() { + instructionParams := TestInstructionParams{ + lineGlob: "^^ starts with caret", + } + + actualLines := getXMLExtractionContent( + "literal-patterns.txt", instructionParams, config) + + Expect(actualLines).Should(Equal([]string{ + "^ starts with caret", + })) + }) + + It("should embed a line ending with a literal dollar pattern", func() { + instructionParams := TestInstructionParams{ + lineGlob: "The value ends with $$", + } + + actualLines := getXMLExtractionContent( + "literal-patterns.txt", instructionParams, config) + + Expect(actualLines).Should(Equal([]string{ + "The value ends with $", + })) + }) + + It("should preserve pattern spaces that are not adjacent to a line separator", func() { + instructionParams := TestInstructionParams{ + lineGlob: "^ padded text $ \\n ^Use \\* to multiply$", + } + + actualLines := getXMLExtractionContent( + "literal-patterns.txt", instructionParams, config) + + Expect(actualLines).Should(Equal([]string{ + " padded text ", + "Use * to multiply", + })) + }) + It("should successfully parse XML by only end glob", func() { instructionParams := TestInstructionParams{ endGlob: "package*", diff --git a/embedding/parsing/pattern.go b/embedding/parsing/pattern.go index 07a6424..fba743c 100644 --- a/embedding/parsing/pattern.go +++ b/embedding/parsing/pattern.go @@ -21,13 +21,16 @@ package parsing import ( "fmt" "strings" + "unicode" + "unicode/utf8" "github.com/gobwas/glob" ) // Pattern represents a glob-like pattern to match a line of a source file. // -// Contains both original glob string and modified pattern suitable for matching. +// Contains both original glob string, modified pattern suitable for matching, +// and a compiled matcher for the modified pattern. // // sourceGlob — a glob-like string, e.g. "*main*" or "^main". // @@ -35,12 +38,15 @@ import ( type Pattern struct { sourceGlob string pattern string + matcher glob.Glob } const ( anyCharacterSequence = "*" lineStart = "^" lineEnd = "$" + lineSeparator = `\n` + escapedLineSeparator = `\\n` ) // NewPattern creates a new Pattern based on provided glob string. @@ -51,6 +57,12 @@ const ( // The modified pattern is the original one, but enclosed with the "*" wildcards, // unless start of the line or end of the line wildcards were specified. // +// A multi-line pattern uses "\n" as a separator between consecutive source-line +// patterns. For example, "Test \n adds two values" matches a line matching "Test" +// followed by a line matching "adds two values". Each part separated by "\n" is +// converted to Pattern separately and follows the same wildcard rules. +// Use "\\n" to match literal "\n" text instead of starting the next pattern line. +// // glob — a string that represents a pattern that can include such wildcards: // - "*" — matches any sequence of characters; // - "^" — matches the start of the line; @@ -65,19 +77,19 @@ const ( // p := NewPattern("^.txt") // fmt.Println("Original glob:", p.sourceGlob) // "*.txt" // fmt.Println("Modified pattern:", p.pattern) // ".txt*" -func NewPattern(glob string) Pattern { - pattern := glob +func NewPattern(globString string) Pattern { + pattern := globString - startOfLine := strings.HasPrefix(glob, lineStart) - if !startOfLine && !strings.HasPrefix(glob, anyCharacterSequence) { + startOfLine := strings.HasPrefix(globString, lineStart) + if !startOfLine && !strings.HasPrefix(globString, anyCharacterSequence) { pattern = anyCharacterSequence + pattern } if startOfLine { pattern = pattern[1:] } - endOfLine := strings.HasSuffix(glob, lineEnd) - if !endOfLine && !strings.HasSuffix(glob, anyCharacterSequence) { + endOfLine := strings.HasSuffix(globString, lineEnd) + if !endOfLine && !strings.HasSuffix(globString, anyCharacterSequence) { pattern += anyCharacterSequence } if endOfLine { @@ -86,8 +98,9 @@ func NewPattern(glob string) Pattern { } return Pattern{ - sourceGlob: glob, + sourceGlob: globString, pattern: pattern, + matcher: glob.MustCompile(pattern), } } @@ -95,9 +108,88 @@ func NewPattern(glob string) Pattern { // // line — a line to check the match for. func (p Pattern) Match(line string) bool { - g := glob.MustCompile(p.pattern) + if p.matcher == nil { + return glob.MustCompile(p.pattern).Match(line) + } + + return p.matcher.Match(line) +} + +// HasLineSeparator reports whether the pattern contains an escaped line separator. +func (p Pattern) HasLineSeparator() bool { + _, hasSeparator := p.linePatterns() + + return hasSeparator +} + +// MatchLineSequence reports whether source lines match the escaped-line-separated pattern. +func (p Pattern) MatchLineSequence(lines []string) bool { + patterns := p.lineSequencePatterns() + + return matchLineSequencePatterns(patterns, lines) +} + +// matchLineSequencePatterns reports whether compiled Patterns match source lines in order. +func matchLineSequencePatterns(patterns []Pattern, lines []string) bool { + if len(patterns) != len(lines) { + return false + } + for i, pattern := range patterns { + if !pattern.Match(lines[i]) { + return false + } + } + + return true +} + +// lineSequencePatterns returns the Patterns for each part of a multi-line pattern. +func (p Pattern) lineSequencePatterns() []Pattern { + patternLines, _ := p.linePatterns() + patterns := make([]Pattern, 0, len(patternLines)) + for _, patternLine := range patternLines { + patterns = append(patterns, NewPattern(patternLine)) + } + + return patterns +} + +// linePatterns returns trimmed pattern lines separated by an escaped newline. +func (p Pattern) linePatterns() ([]string, bool) { + var patternLines []string + var line strings.Builder + hasSeparator := false + trimLeft := false + for i := 0; i < len(p.sourceGlob); { + remaining := p.sourceGlob[i:] + switch { + case strings.HasPrefix(remaining, escapedLineSeparator): + line.WriteString(escapedLineSeparator) + i += len(escapedLineSeparator) + case strings.HasPrefix(remaining, lineSeparator): + patternLines = append(patternLines, strings.TrimRightFunc(line.String(), unicode.IsSpace)) + line.Reset() + hasSeparator = true + trimLeft = true + i += len(lineSeparator) + case trimLeft: + r, size := utf8.DecodeRuneInString(remaining) + if !unicode.IsSpace(r) { + trimLeft = false + line.WriteByte(p.sourceGlob[i]) + i++ + continue + } + i += size + default: + trimLeft = false + line.WriteByte(p.sourceGlob[i]) + i++ + } + } + patternLines = append(patternLines, line.String()) - return g.Match(line) + return patternLines, hasSeparator } // Returns string representation of Pattern. diff --git a/embedding/parsing/xml_parse.go b/embedding/parsing/xml_parse.go index 9d4dcda..6716242 100644 --- a/embedding/parsing/xml_parse.go +++ b/embedding/parsing/xml_parse.go @@ -22,6 +22,7 @@ import ( "embed-code/embed-code-go/configuration" "encoding/xml" "fmt" + "strings" ) // Item needed for xml.Unmarshal parsing. The fields are filling up during the parsing. @@ -69,7 +70,7 @@ func FromXML(line string, config configuration.Configuration) (Instruction, erro // Returns a map of key-value pairs. If the provided line is not valid, returns an error. func ParseXMLLine(xmlLine string) (map[string]string, error) { var root Item - err := xml.Unmarshal([]byte(xmlLine), &root) + err := xml.Unmarshal([]byte(quoteEscapedXMLLine(xmlLine)), &root) if err != nil { return map[string]string{}, err } @@ -86,3 +87,8 @@ func ParseXMLLine(xmlLine string) (map[string]string, error) { return attributes, nil } + +// quoteEscapedXMLLine converts backslash-escaped quotes into XML entities. +func quoteEscapedXMLLine(xmlLine string) string { + return strings.ReplaceAll(xmlLine, `\"`, """) +} diff --git a/main.go b/main.go index b08fad5..c09df6e 100644 --- a/main.go +++ b/main.go @@ -28,7 +28,7 @@ import ( ) // Version of the embed-code application. -const Version = "1.2.0" +const Version = "1.2.1" // The entry point for embed-code. // diff --git a/test/resources/code/java/literal-patterns.txt b/test/resources/code/java/literal-patterns.txt new file mode 100644 index 0000000..034ba90 --- /dev/null +++ b/test/resources/code/java/literal-patterns.txt @@ -0,0 +1,5 @@ + padded text +Use * to multiply +The total is $5 +The value ends with $ +^ starts with caret diff --git a/test/resources/code/java/org/example/MultiLinePatternSample.java b/test/resources/code/java/org/example/MultiLinePatternSample.java new file mode 100644 index 0000000..000a4d6 --- /dev/null +++ b/test/resources/code/java/org/example/MultiLinePatternSample.java @@ -0,0 +1,25 @@ +package org.example; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +class MultiLinePatternSample { + + private static final String MY_STRING = "\n"; + + @Test + @DisplayName("adds two values") + void addsTwoValues() { + int value = 1 + 1; + + assertEquals(2, value); + } + + @Test + @DisplayName("subtracts two values") + void subtractsTwoValues() { + int value = 2 - 1; + + assertEquals(1, value); + } +} diff --git a/test/resources/docs/escaped-newline-exact-pattern.md b/test/resources/docs/escaped-newline-exact-pattern.md new file mode 100644 index 0000000..04417e4 --- /dev/null +++ b/test/resources/docs/escaped-newline-exact-pattern.md @@ -0,0 +1,7 @@ +# Escaped-newline exact pattern + + +```java +``` diff --git a/test/resources/docs/escaped-newline-line-pattern.md b/test/resources/docs/escaped-newline-line-pattern.md new file mode 100644 index 0000000..8fd0a33 --- /dev/null +++ b/test/resources/docs/escaped-newline-line-pattern.md @@ -0,0 +1,6 @@ +# Escaped-newline line pattern + + +```java +``` diff --git a/test/resources/docs/escaped-newline-literal-pattern.md b/test/resources/docs/escaped-newline-literal-pattern.md new file mode 100644 index 0000000..4c45ddf --- /dev/null +++ b/test/resources/docs/escaped-newline-literal-pattern.md @@ -0,0 +1,6 @@ +# Escaped-newline literal pattern + + +```java +``` diff --git a/test/resources/docs/escaped-newline-pattern.md b/test/resources/docs/escaped-newline-pattern.md new file mode 100644 index 0000000..01081f6 --- /dev/null +++ b/test/resources/docs/escaped-newline-pattern.md @@ -0,0 +1,7 @@ +# Escaped-newline pattern + + +```java +```