From 562c0e676d535746a09eca44a3d080bcb0c70488 Mon Sep 17 00:00:00 2001 From: Christopher Hiller Date: Wed, 11 Feb 2026 16:41:43 -0800 Subject: [PATCH 1/3] feat(submit): unwrap hard line breaks in generated PR descriptions Commit message bodies are typically hard-wrapped at ~72 columns, but GitHub renders single newlines as `
` in PR descriptions, resulting in ugly narrow paragraphs. `generatePRBody` now unwraps paragraph lines while preserving markdown structure (code blocks, lists, headers, blockquotes, tables, horizontal rules). If HTML tags are detected, the body is left as-is to avoid mangling intentional formatting. --- cmd/submit.go | 189 ++++++++++++++++++++++++++++++++- cmd/submit_internal_test.go | 201 ++++++++++++++++++++++++++++++++++++ 2 files changed, 388 insertions(+), 2 deletions(-) create mode 100644 cmd/submit_internal_test.go diff --git a/cmd/submit.go b/cmd/submit.go index 45d6097..59edce5 100644 --- a/cmd/submit.go +++ b/cmd/submit.go @@ -4,6 +4,7 @@ package cmd import ( "fmt" "os" + "regexp" "strings" "github.com/boneskull/gh-stack/internal/config" @@ -514,6 +515,10 @@ func promptMarkPRReady(ghClient *github.Client, prNumber int, branch, trunk stri // generatePRBody creates a PR description from the commits between base and head. // For a single commit: returns the commit body. // For multiple commits: returns each commit as a markdown section. +// +// Commit message bodies are unwrapped so that hard line breaks within paragraphs +// (typical of the ~72-column git convention) are removed. This produces better +// rendering in GitHub's PR description, which treats single newlines as
tags. func generatePRBody(g *git.Git, base, head string) (string, error) { commits, err := g.GetCommits(base, head) if err != nil { @@ -526,7 +531,7 @@ func generatePRBody(g *git.Git, base, head string) (string, error) { if len(commits) == 1 { // Single commit: just use the body - return commits[0].Body, nil + return unwrapParagraphs(commits[0].Body), nil } // Multiple commits: format as markdown sections @@ -540,10 +545,190 @@ func generatePRBody(g *git.Git, base, head string) (string, error) { sb.WriteString("\n") if commit.Body != "" { sb.WriteString("\n") - sb.WriteString(commit.Body) + sb.WriteString(unwrapParagraphs(commit.Body)) sb.WriteString("\n") } } return sb.String(), nil } + +// unwrapParagraphs removes hard line breaks within plain-text paragraphs while +// preserving intentional structure: blank lines, markdown block-level syntax +// (headers, lists, blockquotes, horizontal rules), and code blocks (both fenced +// and indented). This converts the ~72-column convention used in commit messages +// into flowing text suitable for GitHub's markdown renderer. +// +// If HTML tags are found in prose (outside code blocks and inline code spans), +// the entire text is returned as-is — anyone writing raw HTML in a commit message +// is doing something intentional with formatting. + +// htmlTagRe matches anything that looks like an HTML tag (e.g.
, ,
). +var htmlTagRe = regexp.MustCompile(`]`) + +// inlineCodeRe matches backtick-enclosed inline code spans so we can strip them +// before checking for HTML. Otherwise `` in code would trigger a false positive. +var inlineCodeRe = regexp.MustCompile("`[^`]+`") + +// containsHTMLOutsideCode scans the text for HTML tags that appear in prose, +// ignoring content inside fenced code blocks, indented code blocks, and inline +// code spans. Returns true if HTML is found in any prose line. +func containsHTMLOutsideCode(text string) bool { + lines := strings.Split(text, "\n") + inFencedCodeBlock := false + + for _, line := range lines { + trimmed := strings.TrimRight(line, " \t") + + // Track fenced code blocks (``` or ~~~) + if !inFencedCodeBlock && (strings.HasPrefix(trimmed, "```") || strings.HasPrefix(trimmed, "~~~")) { + inFencedCodeBlock = true + continue + } + if inFencedCodeBlock { + if strings.HasPrefix(trimmed, "```") || strings.HasPrefix(trimmed, "~~~") { + inFencedCodeBlock = false + } + continue + } + + // Skip indented code blocks (4+ spaces or tab) + if strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t") { + continue + } + + // Strip inline code spans, then check for HTML + stripped := inlineCodeRe.ReplaceAllString(line, "") + if htmlTagRe.MatchString(stripped) { + return true + } + } + + return false +} + +func unwrapParagraphs(text string) string { + if text == "" { + return "" + } + + // Bail if the text contains HTML tags in prose — don't mess with it. + if containsHTMLOutsideCode(text) { + return text + } + + lines := strings.Split(text, "\n") + var result []string + var paragraph []string + inFencedCodeBlock := false + + flushParagraph := func() { + if len(paragraph) > 0 { + result = append(result, strings.Join(paragraph, " ")) + paragraph = nil + } + } + + for _, line := range lines { + trimmed := strings.TrimRight(line, " \t") + + // Track fenced code blocks (``` or ~~~) + if !inFencedCodeBlock && (strings.HasPrefix(trimmed, "```") || strings.HasPrefix(trimmed, "~~~")) { + flushParagraph() + result = append(result, line) + inFencedCodeBlock = true + continue + } + if inFencedCodeBlock { + result = append(result, line) + if strings.HasPrefix(trimmed, "```") || strings.HasPrefix(trimmed, "~~~") { + inFencedCodeBlock = false + } + continue + } + + // Blank line = paragraph break + if trimmed == "" { + flushParagraph() + result = append(result, "") + continue + } + + // Preserve lines that are markdown block-level elements + if isBlockElement(trimmed) { + flushParagraph() + result = append(result, line) + continue + } + + // Indented code block (4+ spaces or tab) + if strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t") { + flushParagraph() + result = append(result, line) + continue + } + + // Otherwise it's a paragraph line — accumulate it + paragraph = append(paragraph, trimmed) + } + + flushParagraph() + + return strings.Join(result, "\n") +} + +// isBlockElement returns true if the line starts with markdown block-level syntax +// that should not be joined with adjacent lines. +func isBlockElement(line string) bool { + // Headers + if strings.HasPrefix(line, "#") { + return true + } + // Unordered lists + if strings.HasPrefix(line, "- ") || strings.HasPrefix(line, "* ") || strings.HasPrefix(line, "+ ") || + line == "-" || line == "*" || line == "+" { + return true + } + // Ordered lists (e.g. "1. ", "12. ") + for i, ch := range line { + if ch >= '0' && ch <= '9' { + continue + } + if ch == '.' && i > 0 && i+1 < len(line) && line[i+1] == ' ' { + return true + } + break + } + // Blockquotes + if strings.HasPrefix(line, ">") { + return true + } + // Horizontal rules (---, ***, ___) + if isHorizontalRule(line) { + return true + } + // Pipe tables + if strings.HasPrefix(line, "|") { + return true + } + return false +} + +// isHorizontalRule checks for markdown horizontal rules: three or more +// -, *, or _ characters (with optional spaces). +func isHorizontalRule(line string) bool { + stripped := strings.ReplaceAll(line, " ", "") + if len(stripped) < 3 { + return false + } + ch := stripped[0] + if ch != '-' && ch != '*' && ch != '_' { + return false + } + for _, c := range stripped { + if byte(c) != ch { + return false + } + } + return true +} diff --git a/cmd/submit_internal_test.go b/cmd/submit_internal_test.go new file mode 100644 index 0000000..9562f02 --- /dev/null +++ b/cmd/submit_internal_test.go @@ -0,0 +1,201 @@ +// cmd/submit_internal_test.go +package cmd + +import ( + "testing" +) + +func TestUnwrapParagraphs(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + { + name: "empty string", + in: "", + want: "", + }, + { + name: "single line", + in: "Hello world", + want: "Hello world", + }, + { + name: "hard-wrapped paragraph becomes single line", + in: "This is a paragraph that was\nhard-wrapped at around 72 columns\nfor the commit message.", + want: "This is a paragraph that was hard-wrapped at around 72 columns for the commit message.", + }, + { + name: "blank lines preserved as paragraph breaks", + in: "First paragraph that is\nhard-wrapped.\n\nSecond paragraph also\nhard-wrapped.", + want: "First paragraph that is hard-wrapped.\n\nSecond paragraph also hard-wrapped.", + }, + { + name: "fenced code block preserved verbatim", + in: "Before code:\n\n```go\nfunc main() {\n fmt.Println(\"hello\")\n}\n```\n\nAfter code.", + want: "Before code:\n\n```go\nfunc main() {\n fmt.Println(\"hello\")\n}\n```\n\nAfter code.", + }, + { + name: "tilde fenced code block preserved", + in: "Text before.\n\n~~~\ncode here\n~~~\n\nText after.", + want: "Text before.\n\n~~~\ncode here\n~~~\n\nText after.", + }, + { + name: "indented code block preserved", + in: "Some text.\n\n indented code line 1\n indented code line 2\n\nMore text.", + want: "Some text.\n\n indented code line 1\n indented code line 2\n\nMore text.", + }, + { + name: "unordered list items preserved", + in: "Changes:\n\n- First item\n- Second item that is\n also long\n- Third item", + // The continuation line (2-space indent) is preserved as-is; + // GitHub's markdown renderer already handles this correctly. + want: "Changes:\n\n- First item\n- Second item that is\n also long\n- Third item", + }, + { + name: "ordered list items preserved", + in: "Steps:\n\n1. First step\n2. Second step\n3. Third step", + want: "Steps:\n\n1. First step\n2. Second step\n3. Third step", + }, + { + name: "headers preserved", + in: "## Section\n\nParagraph that is\nhard-wrapped here.\n\n### Subsection\n\nAnother para.", + want: "## Section\n\nParagraph that is hard-wrapped here.\n\n### Subsection\n\nAnother para.", + }, + { + name: "blockquotes preserved", + in: "> This is a quote\n> that spans lines\n\nRegular text.", + want: "> This is a quote\n> that spans lines\n\nRegular text.", + }, + { + name: "horizontal rule preserved", + in: "Above\n\n---\n\nBelow", + want: "Above\n\n---\n\nBelow", + }, + { + name: "realistic commit message body", + in: "This commit refactors the authentication middleware to\nuse JWT tokens instead of session cookies. The change\nimproves scalability by removing server-side session\nstorage requirements.\n\nKey changes:\n\n- Replace session middleware with JWT validation\n- Add token refresh endpoint\n- Update tests to use new auth flow\n\nBreaking change: clients must now send an\n`Authorization: Bearer ` header instead of\nrelying on cookies.", + want: "This commit refactors the authentication middleware to use JWT tokens instead of session cookies. The change improves scalability by removing server-side session storage requirements.\n\nKey changes:\n\n- Replace session middleware with JWT validation\n- Add token refresh endpoint\n- Update tests to use new auth flow\n\nBreaking change: clients must now send an `Authorization: Bearer ` header instead of relying on cookies.", + }, + { + name: "pipe tables preserved", + in: "Results:\n\n| Name | Value |\n|------|-------|\n| foo | 42 |", + want: "Results:\n\n| Name | Value |\n|------|-------|\n| foo | 42 |", + }, + { + name: "trailing whitespace on wrapped lines is trimmed", + in: "Line one with trailing space \nline two.", + want: "Line one with trailing space line two.", + }, + { + name: "HTML tags cause bail-out", + in: "Some text that is\nhard-wrapped.\n\n
\nClick me\nHidden content\n
", + want: "Some text that is\nhard-wrapped.\n\n
\nClick me\nHidden content\n
", + }, + { + name: "inline HTML tag causes bail-out", + in: "This has a
in it\nand wraps.", + want: "This has a
in it\nand wraps.", + }, + { + name: "angle bracket in non-HTML context still unwraps", + in: "The value x < y is\nalways true.", + want: "The value x < y is always true.", + }, + { + name: "HTML inside fenced code block does not trigger bail-out", + in: "This adds a component.\n\n```html\n
\n hello\n
\n```\n\nThe paragraph is\nhard-wrapped here.", + want: "This adds a component.\n\n```html\n
\n hello\n
\n```\n\nThe paragraph is hard-wrapped here.", + }, + { + name: "HTML inside indented code block does not trigger bail-out", + in: "Example:\n\n
indented html
\n\nMore text that is\nhard-wrapped.", + want: "Example:\n\n
indented html
\n\nMore text that is hard-wrapped.", + }, + { + name: "HTML in prose with code block HTML still bails out", + in: "Use the
tag.\n\n```html\n
code
\n```", + want: "Use the
tag.\n\n```html\n
code
\n```", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := unwrapParagraphs(tt.in) + if got != tt.want { + t.Errorf("unwrapParagraphs() mismatch\n got: %q\nwant: %q", got, tt.want) + } + }) + } +} + +func TestContainsHTMLOutsideCode(t *testing.T) { + tests := []struct { + name string + in string + want bool + }{ + {"no HTML", "just plain text", false}, + {"HTML in prose", "Use
here", true}, + {"HTML in fenced code block", "```\n
hi
\n```", false}, + {"HTML in indented code block", "
hi
", false}, + {"HTML in inline code", "Use `
` for this", false}, + {"HTML in prose AND code block", "
\n\n```\n
x
\n```", true}, + {"angle bracket not HTML", "x < y", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := containsHTMLOutsideCode(tt.in) + if got != tt.want { + t.Errorf("containsHTMLOutsideCode() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIsBlockElement(t *testing.T) { + blockLines := []string{ + "# Header", + "## Header 2", + "- list item", + "* list item", + "+ list item", + "1. ordered", + "12. multi-digit ordered", + "> blockquote", + "| table row", + } + for _, line := range blockLines { + if !isBlockElement(line) { + t.Errorf("expected isBlockElement(%q) = true", line) + } + } + + nonBlockLines := []string{ + "just text", + "This starts a sentence.", + "2nd place finish", + } + for _, line := range nonBlockLines { + if isBlockElement(line) { + t.Errorf("expected isBlockElement(%q) = false", line) + } + } +} + +func TestIsHorizontalRule(t *testing.T) { + rules := []string{"---", "***", "___", "- - -", "* * *", "----", "****"} + for _, r := range rules { + if !isHorizontalRule(r) { + t.Errorf("expected isHorizontalRule(%q) = true", r) + } + } + + nonRules := []string{"--", "**", "-", "abc", "---x"} + for _, r := range nonRules { + if isHorizontalRule(r) { + t.Errorf("expected isHorizontalRule(%q) = false", r) + } + } +} From 210204cdf99c4eefa5abdb6400a478754b0ad79c Mon Sep 17 00:00:00 2001 From: Christopher Hiller Date: Wed, 11 Feb 2026 17:03:45 -0800 Subject: [PATCH 2/3] fix(submit): track fence markers to prevent mismatched close The fenced code block state machine treated `````` and `~~~` as interchangeable openers/closers. Now each code block tracks which marker opened it and only closes on the same marker, preventing a `~~~` inside a `````` block (or vice versa) from prematurely ending code block mode. Also adds explanatory comment to `submit_internal_test.go` for why it uses `package cmd` instead of `package cmd_test`. --- cmd/submit.go | 42 ++++++++++++++++++++++++------------- cmd/submit_internal_test.go | 15 +++++++++++++ 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/cmd/submit.go b/cmd/submit.go index 59edce5..513f104 100644 --- a/cmd/submit.go +++ b/cmd/submit.go @@ -570,24 +570,37 @@ var htmlTagRe = regexp.MustCompile(`]`) // before checking for HTML. Otherwise `` in code would trigger a false positive. var inlineCodeRe = regexp.MustCompile("`[^`]+`") +// fenceMarker returns the fence prefix ("```" or "~~~") if the line opens or +// closes a fenced code block, or "" otherwise. +func fenceMarker(trimmedLine string) string { + if strings.HasPrefix(trimmedLine, "```") { + return "```" + } + if strings.HasPrefix(trimmedLine, "~~~") { + return "~~~" + } + return "" +} + // containsHTMLOutsideCode scans the text for HTML tags that appear in prose, // ignoring content inside fenced code blocks, indented code blocks, and inline // code spans. Returns true if HTML is found in any prose line. func containsHTMLOutsideCode(text string) bool { lines := strings.Split(text, "\n") - inFencedCodeBlock := false + var openFence string // tracks the opening fence marker ("```" or "~~~"), empty when outside for _, line := range lines { trimmed := strings.TrimRight(line, " \t") + marker := fenceMarker(trimmed) - // Track fenced code blocks (``` or ~~~) - if !inFencedCodeBlock && (strings.HasPrefix(trimmed, "```") || strings.HasPrefix(trimmed, "~~~")) { - inFencedCodeBlock = true + // Track fenced code blocks — only the matching marker can close a block + if openFence == "" && marker != "" { + openFence = marker continue } - if inFencedCodeBlock { - if strings.HasPrefix(trimmed, "```") || strings.HasPrefix(trimmed, "~~~") { - inFencedCodeBlock = false + if openFence != "" { + if marker == openFence { + openFence = "" } continue } @@ -620,7 +633,7 @@ func unwrapParagraphs(text string) string { lines := strings.Split(text, "\n") var result []string var paragraph []string - inFencedCodeBlock := false + var openFence string // tracks the opening fence marker ("```" or "~~~"), empty when outside flushParagraph := func() { if len(paragraph) > 0 { @@ -631,18 +644,19 @@ func unwrapParagraphs(text string) string { for _, line := range lines { trimmed := strings.TrimRight(line, " \t") + marker := fenceMarker(trimmed) - // Track fenced code blocks (``` or ~~~) - if !inFencedCodeBlock && (strings.HasPrefix(trimmed, "```") || strings.HasPrefix(trimmed, "~~~")) { + // Track fenced code blocks — only the matching marker can close a block + if openFence == "" && marker != "" { flushParagraph() result = append(result, line) - inFencedCodeBlock = true + openFence = marker continue } - if inFencedCodeBlock { + if openFence != "" { result = append(result, line) - if strings.HasPrefix(trimmed, "```") || strings.HasPrefix(trimmed, "~~~") { - inFencedCodeBlock = false + if marker == openFence { + openFence = "" } continue } diff --git a/cmd/submit_internal_test.go b/cmd/submit_internal_test.go index 9562f02..5369544 100644 --- a/cmd/submit_internal_test.go +++ b/cmd/submit_internal_test.go @@ -1,4 +1,9 @@ // cmd/submit_internal_test.go +// +// This file uses package cmd (not cmd_test) to unit-test unexported helpers +// like unwrapParagraphs, isBlockElement, etc. that are pure functions with no +// dependency on command wiring. The external test files (package cmd_test) +// cover command-level integration behavior. package cmd import ( @@ -118,6 +123,16 @@ func TestUnwrapParagraphs(t *testing.T) { in: "Use the
tag.\n\n```html\n
code
\n```", want: "Use the
tag.\n\n```html\n
code
\n```", }, + { + name: "mismatched fence markers do not close each other", + in: "Text before.\n\n```\n~~~\nstill in code\n```\n\nParagraph that is\nhard-wrapped.", + want: "Text before.\n\n```\n~~~\nstill in code\n```\n\nParagraph that is hard-wrapped.", + }, + { + name: "tilde fence with backticks inside", + in: "Text.\n\n~~~\n```\nnested marker\n~~~\n\nWrapped line\ncontinues here.", + want: "Text.\n\n~~~\n```\nnested marker\n~~~\n\nWrapped line continues here.", + }, } for _, tt := range tests { From 83fc65598b8ecf6af29a8617cf8f1b9defa3277f Mon Sep 17 00:00:00 2001 From: Christopher Hiller Date: Wed, 11 Feb 2026 17:08:03 -0800 Subject: [PATCH 3/3] fix(submit): handle hyphenated HTML tags, fix doc comment placement - Expand `htmlTagRe` to match custom elements with hyphens (e.g. ``) and namespaced tags (e.g. ``) - Move orphaned doc comment block to directly precede `unwrapParagraphs` so it attaches to the function in godoc --- cmd/submit.go | 24 ++++++++++++------------ cmd/submit_internal_test.go | 2 ++ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/cmd/submit.go b/cmd/submit.go index 513f104..da4917e 100644 --- a/cmd/submit.go +++ b/cmd/submit.go @@ -553,18 +553,9 @@ func generatePRBody(g *git.Git, base, head string) (string, error) { return sb.String(), nil } -// unwrapParagraphs removes hard line breaks within plain-text paragraphs while -// preserving intentional structure: blank lines, markdown block-level syntax -// (headers, lists, blockquotes, horizontal rules), and code blocks (both fenced -// and indented). This converts the ~72-column convention used in commit messages -// into flowing text suitable for GitHub's markdown renderer. -// -// If HTML tags are found in prose (outside code blocks and inline code spans), -// the entire text is returned as-is — anyone writing raw HTML in a commit message -// is doing something intentional with formatting. - -// htmlTagRe matches anything that looks like an HTML tag (e.g.
, ,
). -var htmlTagRe = regexp.MustCompile(`]`) +// htmlTagRe matches anything that looks like an HTML tag, including custom +// elements with hyphens (e.g. ) and namespaced tags (e.g. ). +var htmlTagRe = regexp.MustCompile(`]`) // inlineCodeRe matches backtick-enclosed inline code spans so we can strip them // before checking for HTML. Otherwise `` in code would trigger a false positive. @@ -620,6 +611,15 @@ func containsHTMLOutsideCode(text string) bool { return false } +// unwrapParagraphs removes hard line breaks within plain-text paragraphs while +// preserving intentional structure: blank lines, markdown block-level syntax +// (headers, lists, blockquotes, horizontal rules), and code blocks (both fenced +// and indented). This converts the ~72-column convention used in commit messages +// into flowing text suitable for GitHub's markdown renderer. +// +// If HTML tags are found in prose (outside code blocks and inline code spans), +// the entire text is returned as-is — anyone writing raw HTML in a commit message +// is doing something intentional with formatting. func unwrapParagraphs(text string) string { if text == "" { return "" diff --git a/cmd/submit_internal_test.go b/cmd/submit_internal_test.go index 5369544..5cd21d8 100644 --- a/cmd/submit_internal_test.go +++ b/cmd/submit_internal_test.go @@ -158,6 +158,8 @@ func TestContainsHTMLOutsideCode(t *testing.T) { {"HTML in inline code", "Use `
` for this", false}, {"HTML in prose AND code block", "
\n\n```\n
x
\n```", true}, {"angle bracket not HTML", "x < y", false}, + {"hyphenated custom element", "Use here", true}, + {"namespaced XML tag", "The element", true}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) {