diff --git a/cmd/submit.go b/cmd/submit.go index 45d6097..da4917e 100644 --- a/cmd/submit.go +++ b/cmd/submit.go @@ -4,6 +4,7 @@ package cmd import ( "fmt" "os" + "regexp" "strings" "github.com/boneskull/gh-stack/internal/config" @@ -514,6 +515,10 @@ func promptMarkPRReady(ghClient *github.Client, prNumber int, branch, trunk stri // generatePRBody creates a PR description from the commits between base and head. // For a single commit: returns the commit body. // For multiple commits: returns each commit as a markdown section. +// +// Commit message bodies are unwrapped so that hard line breaks within paragraphs +// (typical of the ~72-column git convention) are removed. This produces better +// rendering in GitHub's PR description, which treats single newlines as
tags. func generatePRBody(g *git.Git, base, head string) (string, error) { commits, err := g.GetCommits(base, head) if err != nil { @@ -526,7 +531,7 @@ func generatePRBody(g *git.Git, base, head string) (string, error) { if len(commits) == 1 { // Single commit: just use the body - return commits[0].Body, nil + return unwrapParagraphs(commits[0].Body), nil } // Multiple commits: format as markdown sections @@ -540,10 +545,204 @@ func generatePRBody(g *git.Git, base, head string) (string, error) { sb.WriteString("\n") if commit.Body != "" { sb.WriteString("\n") - sb.WriteString(commit.Body) + sb.WriteString(unwrapParagraphs(commit.Body)) sb.WriteString("\n") } } return sb.String(), nil } + +// htmlTagRe matches anything that looks like an HTML tag, including custom +// elements with hyphens (e.g. ) and namespaced tags (e.g. ). +var htmlTagRe = regexp.MustCompile(`]`) + +// inlineCodeRe matches backtick-enclosed inline code spans so we can strip them +// before checking for HTML. Otherwise `` in code would trigger a false positive. +var inlineCodeRe = regexp.MustCompile("`[^`]+`") + +// fenceMarker returns the fence prefix ("```" or "~~~") if the line opens or +// closes a fenced code block, or "" otherwise. +func fenceMarker(trimmedLine string) string { + if strings.HasPrefix(trimmedLine, "```") { + return "```" + } + if strings.HasPrefix(trimmedLine, "~~~") { + return "~~~" + } + return "" +} + +// containsHTMLOutsideCode scans the text for HTML tags that appear in prose, +// ignoring content inside fenced code blocks, indented code blocks, and inline +// code spans. Returns true if HTML is found in any prose line. +func containsHTMLOutsideCode(text string) bool { + lines := strings.Split(text, "\n") + var openFence string // tracks the opening fence marker ("```" or "~~~"), empty when outside + + for _, line := range lines { + trimmed := strings.TrimRight(line, " \t") + marker := fenceMarker(trimmed) + + // Track fenced code blocks — only the matching marker can close a block + if openFence == "" && marker != "" { + openFence = marker + continue + } + if openFence != "" { + if marker == openFence { + openFence = "" + } + continue + } + + // Skip indented code blocks (4+ spaces or tab) + if strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t") { + continue + } + + // Strip inline code spans, then check for HTML + stripped := inlineCodeRe.ReplaceAllString(line, "") + if htmlTagRe.MatchString(stripped) { + return true + } + } + + return false +} + +// unwrapParagraphs removes hard line breaks within plain-text paragraphs while +// preserving intentional structure: blank lines, markdown block-level syntax +// (headers, lists, blockquotes, horizontal rules), and code blocks (both fenced +// and indented). This converts the ~72-column convention used in commit messages +// into flowing text suitable for GitHub's markdown renderer. +// +// If HTML tags are found in prose (outside code blocks and inline code spans), +// the entire text is returned as-is — anyone writing raw HTML in a commit message +// is doing something intentional with formatting. +func unwrapParagraphs(text string) string { + if text == "" { + return "" + } + + // Bail if the text contains HTML tags in prose — don't mess with it. + if containsHTMLOutsideCode(text) { + return text + } + + lines := strings.Split(text, "\n") + var result []string + var paragraph []string + var openFence string // tracks the opening fence marker ("```" or "~~~"), empty when outside + + flushParagraph := func() { + if len(paragraph) > 0 { + result = append(result, strings.Join(paragraph, " ")) + paragraph = nil + } + } + + for _, line := range lines { + trimmed := strings.TrimRight(line, " \t") + marker := fenceMarker(trimmed) + + // Track fenced code blocks — only the matching marker can close a block + if openFence == "" && marker != "" { + flushParagraph() + result = append(result, line) + openFence = marker + continue + } + if openFence != "" { + result = append(result, line) + if marker == openFence { + openFence = "" + } + continue + } + + // Blank line = paragraph break + if trimmed == "" { + flushParagraph() + result = append(result, "") + continue + } + + // Preserve lines that are markdown block-level elements + if isBlockElement(trimmed) { + flushParagraph() + result = append(result, line) + continue + } + + // Indented code block (4+ spaces or tab) + if strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t") { + flushParagraph() + result = append(result, line) + continue + } + + // Otherwise it's a paragraph line — accumulate it + paragraph = append(paragraph, trimmed) + } + + flushParagraph() + + return strings.Join(result, "\n") +} + +// isBlockElement returns true if the line starts with markdown block-level syntax +// that should not be joined with adjacent lines. +func isBlockElement(line string) bool { + // Headers + if strings.HasPrefix(line, "#") { + return true + } + // Unordered lists + if strings.HasPrefix(line, "- ") || strings.HasPrefix(line, "* ") || strings.HasPrefix(line, "+ ") || + line == "-" || line == "*" || line == "+" { + return true + } + // Ordered lists (e.g. "1. ", "12. ") + for i, ch := range line { + if ch >= '0' && ch <= '9' { + continue + } + if ch == '.' && i > 0 && i+1 < len(line) && line[i+1] == ' ' { + return true + } + break + } + // Blockquotes + if strings.HasPrefix(line, ">") { + return true + } + // Horizontal rules (---, ***, ___) + if isHorizontalRule(line) { + return true + } + // Pipe tables + if strings.HasPrefix(line, "|") { + return true + } + return false +} + +// isHorizontalRule checks for markdown horizontal rules: three or more +// -, *, or _ characters (with optional spaces). +func isHorizontalRule(line string) bool { + stripped := strings.ReplaceAll(line, " ", "") + if len(stripped) < 3 { + return false + } + ch := stripped[0] + if ch != '-' && ch != '*' && ch != '_' { + return false + } + for _, c := range stripped { + if byte(c) != ch { + return false + } + } + return true +} diff --git a/cmd/submit_internal_test.go b/cmd/submit_internal_test.go new file mode 100644 index 0000000..5cd21d8 --- /dev/null +++ b/cmd/submit_internal_test.go @@ -0,0 +1,218 @@ +// cmd/submit_internal_test.go +// +// This file uses package cmd (not cmd_test) to unit-test unexported helpers +// like unwrapParagraphs, isBlockElement, etc. that are pure functions with no +// dependency on command wiring. The external test files (package cmd_test) +// cover command-level integration behavior. +package cmd + +import ( + "testing" +) + +func TestUnwrapParagraphs(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + { + name: "empty string", + in: "", + want: "", + }, + { + name: "single line", + in: "Hello world", + want: "Hello world", + }, + { + name: "hard-wrapped paragraph becomes single line", + in: "This is a paragraph that was\nhard-wrapped at around 72 columns\nfor the commit message.", + want: "This is a paragraph that was hard-wrapped at around 72 columns for the commit message.", + }, + { + name: "blank lines preserved as paragraph breaks", + in: "First paragraph that is\nhard-wrapped.\n\nSecond paragraph also\nhard-wrapped.", + want: "First paragraph that is hard-wrapped.\n\nSecond paragraph also hard-wrapped.", + }, + { + name: "fenced code block preserved verbatim", + in: "Before code:\n\n```go\nfunc main() {\n fmt.Println(\"hello\")\n}\n```\n\nAfter code.", + want: "Before code:\n\n```go\nfunc main() {\n fmt.Println(\"hello\")\n}\n```\n\nAfter code.", + }, + { + name: "tilde fenced code block preserved", + in: "Text before.\n\n~~~\ncode here\n~~~\n\nText after.", + want: "Text before.\n\n~~~\ncode here\n~~~\n\nText after.", + }, + { + name: "indented code block preserved", + in: "Some text.\n\n indented code line 1\n indented code line 2\n\nMore text.", + want: "Some text.\n\n indented code line 1\n indented code line 2\n\nMore text.", + }, + { + name: "unordered list items preserved", + in: "Changes:\n\n- First item\n- Second item that is\n also long\n- Third item", + // The continuation line (2-space indent) is preserved as-is; + // GitHub's markdown renderer already handles this correctly. + want: "Changes:\n\n- First item\n- Second item that is\n also long\n- Third item", + }, + { + name: "ordered list items preserved", + in: "Steps:\n\n1. First step\n2. Second step\n3. Third step", + want: "Steps:\n\n1. First step\n2. Second step\n3. Third step", + }, + { + name: "headers preserved", + in: "## Section\n\nParagraph that is\nhard-wrapped here.\n\n### Subsection\n\nAnother para.", + want: "## Section\n\nParagraph that is hard-wrapped here.\n\n### Subsection\n\nAnother para.", + }, + { + name: "blockquotes preserved", + in: "> This is a quote\n> that spans lines\n\nRegular text.", + want: "> This is a quote\n> that spans lines\n\nRegular text.", + }, + { + name: "horizontal rule preserved", + in: "Above\n\n---\n\nBelow", + want: "Above\n\n---\n\nBelow", + }, + { + name: "realistic commit message body", + in: "This commit refactors the authentication middleware to\nuse JWT tokens instead of session cookies. The change\nimproves scalability by removing server-side session\nstorage requirements.\n\nKey changes:\n\n- Replace session middleware with JWT validation\n- Add token refresh endpoint\n- Update tests to use new auth flow\n\nBreaking change: clients must now send an\n`Authorization: Bearer ` header instead of\nrelying on cookies.", + want: "This commit refactors the authentication middleware to use JWT tokens instead of session cookies. The change improves scalability by removing server-side session storage requirements.\n\nKey changes:\n\n- Replace session middleware with JWT validation\n- Add token refresh endpoint\n- Update tests to use new auth flow\n\nBreaking change: clients must now send an `Authorization: Bearer ` header instead of relying on cookies.", + }, + { + name: "pipe tables preserved", + in: "Results:\n\n| Name | Value |\n|------|-------|\n| foo | 42 |", + want: "Results:\n\n| Name | Value |\n|------|-------|\n| foo | 42 |", + }, + { + name: "trailing whitespace on wrapped lines is trimmed", + in: "Line one with trailing space \nline two.", + want: "Line one with trailing space line two.", + }, + { + name: "HTML tags cause bail-out", + in: "Some text that is\nhard-wrapped.\n\n
\nClick me\nHidden content\n
", + want: "Some text that is\nhard-wrapped.\n\n
\nClick me\nHidden content\n
", + }, + { + name: "inline HTML tag causes bail-out", + in: "This has a
in it\nand wraps.", + want: "This has a
in it\nand wraps.", + }, + { + name: "angle bracket in non-HTML context still unwraps", + in: "The value x < y is\nalways true.", + want: "The value x < y is always true.", + }, + { + name: "HTML inside fenced code block does not trigger bail-out", + in: "This adds a component.\n\n```html\n
\n hello\n
\n```\n\nThe paragraph is\nhard-wrapped here.", + want: "This adds a component.\n\n```html\n
\n hello\n
\n```\n\nThe paragraph is hard-wrapped here.", + }, + { + name: "HTML inside indented code block does not trigger bail-out", + in: "Example:\n\n
indented html
\n\nMore text that is\nhard-wrapped.", + want: "Example:\n\n
indented html
\n\nMore text that is hard-wrapped.", + }, + { + name: "HTML in prose with code block HTML still bails out", + in: "Use the
tag.\n\n```html\n
code
\n```", + want: "Use the
tag.\n\n```html\n
code
\n```", + }, + { + name: "mismatched fence markers do not close each other", + in: "Text before.\n\n```\n~~~\nstill in code\n```\n\nParagraph that is\nhard-wrapped.", + want: "Text before.\n\n```\n~~~\nstill in code\n```\n\nParagraph that is hard-wrapped.", + }, + { + name: "tilde fence with backticks inside", + in: "Text.\n\n~~~\n```\nnested marker\n~~~\n\nWrapped line\ncontinues here.", + want: "Text.\n\n~~~\n```\nnested marker\n~~~\n\nWrapped line continues here.", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := unwrapParagraphs(tt.in) + if got != tt.want { + t.Errorf("unwrapParagraphs() mismatch\n got: %q\nwant: %q", got, tt.want) + } + }) + } +} + +func TestContainsHTMLOutsideCode(t *testing.T) { + tests := []struct { + name string + in string + want bool + }{ + {"no HTML", "just plain text", false}, + {"HTML in prose", "Use
here", true}, + {"HTML in fenced code block", "```\n
hi
\n```", false}, + {"HTML in indented code block", "
hi
", false}, + {"HTML in inline code", "Use `
` for this", false}, + {"HTML in prose AND code block", "
\n\n```\n
x
\n```", true}, + {"angle bracket not HTML", "x < y", false}, + {"hyphenated custom element", "Use here", true}, + {"namespaced XML tag", "The element", true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := containsHTMLOutsideCode(tt.in) + if got != tt.want { + t.Errorf("containsHTMLOutsideCode() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIsBlockElement(t *testing.T) { + blockLines := []string{ + "# Header", + "## Header 2", + "- list item", + "* list item", + "+ list item", + "1. ordered", + "12. multi-digit ordered", + "> blockquote", + "| table row", + } + for _, line := range blockLines { + if !isBlockElement(line) { + t.Errorf("expected isBlockElement(%q) = true", line) + } + } + + nonBlockLines := []string{ + "just text", + "This starts a sentence.", + "2nd place finish", + } + for _, line := range nonBlockLines { + if isBlockElement(line) { + t.Errorf("expected isBlockElement(%q) = false", line) + } + } +} + +func TestIsHorizontalRule(t *testing.T) { + rules := []string{"---", "***", "___", "- - -", "* * *", "----", "****"} + for _, r := range rules { + if !isHorizontalRule(r) { + t.Errorf("expected isHorizontalRule(%q) = true", r) + } + } + + nonRules := []string{"--", "**", "-", "abc", "---x"} + for _, r := range nonRules { + if isHorizontalRule(r) { + t.Errorf("expected isHorizontalRule(%q) = false", r) + } + } +}