Skip to content

Commit e25fdac

Browse files
committed
feat(submit): unwrap hard line breaks in generated PR descriptions
Commit message bodies are typically hard-wrapped at ~72 columns, but GitHub renders single newlines as `<br>` in PR descriptions, resulting in ugly narrow paragraphs. `generatePRBody` now unwraps paragraph lines while preserving markdown structure (code blocks, lists, headers, blockquotes, tables, horizontal rules). If HTML tags are detected, the body is left as-is to avoid mangling intentional formatting.
1 parent f23002e commit e25fdac

2 files changed

Lines changed: 311 additions & 2 deletions

File tree

cmd/submit.go

Lines changed: 149 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package cmd
44
import (
55
"fmt"
66
"os"
7+
"regexp"
78
"strings"
89

910
"github.com/boneskull/gh-stack/internal/config"
@@ -514,6 +515,10 @@ func promptMarkPRReady(ghClient *github.Client, prNumber int, branch, trunk stri
514515
// generatePRBody creates a PR description from the commits between base and head.
515516
// For a single commit: returns the commit body.
516517
// For multiple commits: returns each commit as a markdown section.
518+
//
519+
// Commit message bodies are unwrapped so that hard line breaks within paragraphs
520+
// (typical of the ~72-column git convention) are removed. This produces better
521+
// rendering in GitHub's PR description, which treats single newlines as <br> tags.
517522
func generatePRBody(g *git.Git, base, head string) (string, error) {
518523
commits, err := g.GetCommits(base, head)
519524
if err != nil {
@@ -526,7 +531,7 @@ func generatePRBody(g *git.Git, base, head string) (string, error) {
526531

527532
if len(commits) == 1 {
528533
// Single commit: just use the body
529-
return commits[0].Body, nil
534+
return unwrapParagraphs(commits[0].Body), nil
530535
}
531536

532537
// Multiple commits: format as markdown sections
@@ -540,10 +545,152 @@ func generatePRBody(g *git.Git, base, head string) (string, error) {
540545
sb.WriteString("\n")
541546
if commit.Body != "" {
542547
sb.WriteString("\n")
543-
sb.WriteString(commit.Body)
548+
sb.WriteString(unwrapParagraphs(commit.Body))
544549
sb.WriteString("\n")
545550
}
546551
}
547552

548553
return sb.String(), nil
549554
}
555+
556+
// unwrapParagraphs removes hard line breaks within plain-text paragraphs while
557+
// preserving intentional structure: blank lines, markdown block-level syntax
558+
// (headers, lists, blockquotes, horizontal rules), and code blocks (both fenced
559+
// and indented). This converts the ~72-column convention used in commit messages
560+
// into flowing text suitable for GitHub's markdown renderer.
561+
// htmlTagRe matches anything that looks like an HTML tag (e.g. <div>, </span>, <br/>).
562+
// If we see one anywhere in the text, we bail on unwrapping entirely — anyone writing
563+
// raw HTML in a commit message is doing something intentional with formatting.
564+
var htmlTagRe = regexp.MustCompile(`</?[a-zA-Z][a-zA-Z0-9]*[\s/>]`)
565+
566+
// inlineCodeRe matches backtick-enclosed inline code spans so we can strip them
567+
// before checking for HTML. Otherwise `<token>` in code would trigger a false positive.
568+
var inlineCodeRe = regexp.MustCompile("`[^`]+`")
569+
570+
func unwrapParagraphs(text string) string {
571+
if text == "" {
572+
return ""
573+
}
574+
575+
// Bail if the text contains HTML tags — don't mess with it.
576+
// Strip inline code spans first so that e.g. `<token>` doesn't false-positive.
577+
stripped := inlineCodeRe.ReplaceAllString(text, "")
578+
if htmlTagRe.MatchString(stripped) {
579+
return text
580+
}
581+
582+
lines := strings.Split(text, "\n")
583+
var result []string
584+
var paragraph []string
585+
inFencedCodeBlock := false
586+
587+
flushParagraph := func() {
588+
if len(paragraph) > 0 {
589+
result = append(result, strings.Join(paragraph, " "))
590+
paragraph = nil
591+
}
592+
}
593+
594+
for _, line := range lines {
595+
trimmed := strings.TrimRight(line, " \t")
596+
597+
// Track fenced code blocks (``` or ~~~)
598+
if !inFencedCodeBlock && (strings.HasPrefix(trimmed, "```") || strings.HasPrefix(trimmed, "~~~")) {
599+
flushParagraph()
600+
result = append(result, line)
601+
inFencedCodeBlock = true
602+
continue
603+
}
604+
if inFencedCodeBlock {
605+
result = append(result, line)
606+
if strings.HasPrefix(trimmed, "```") || strings.HasPrefix(trimmed, "~~~") {
607+
inFencedCodeBlock = false
608+
}
609+
continue
610+
}
611+
612+
// Blank line = paragraph break
613+
if trimmed == "" {
614+
flushParagraph()
615+
result = append(result, "")
616+
continue
617+
}
618+
619+
// Preserve lines that are markdown block-level elements
620+
if isBlockElement(trimmed) {
621+
flushParagraph()
622+
result = append(result, line)
623+
continue
624+
}
625+
626+
// Indented code block (4+ spaces or tab)
627+
if strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t") {
628+
flushParagraph()
629+
result = append(result, line)
630+
continue
631+
}
632+
633+
// Otherwise it's a paragraph line — accumulate it
634+
paragraph = append(paragraph, trimmed)
635+
}
636+
637+
flushParagraph()
638+
639+
return strings.Join(result, "\n")
640+
}
641+
642+
// isBlockElement returns true if the line starts with markdown block-level syntax
643+
// that should not be joined with adjacent lines.
644+
func isBlockElement(line string) bool {
645+
// Headers
646+
if strings.HasPrefix(line, "#") {
647+
return true
648+
}
649+
// Unordered lists
650+
if strings.HasPrefix(line, "- ") || strings.HasPrefix(line, "* ") || strings.HasPrefix(line, "+ ") ||
651+
line == "-" || line == "*" || line == "+" {
652+
return true
653+
}
654+
// Ordered lists (e.g. "1. ", "12. ")
655+
for i, ch := range line {
656+
if ch >= '0' && ch <= '9' {
657+
continue
658+
}
659+
if ch == '.' && i > 0 && i+1 < len(line) && line[i+1] == ' ' {
660+
return true
661+
}
662+
break
663+
}
664+
// Blockquotes
665+
if strings.HasPrefix(line, ">") {
666+
return true
667+
}
668+
// Horizontal rules (---, ***, ___)
669+
if isHorizontalRule(line) {
670+
return true
671+
}
672+
// Pipe tables
673+
if strings.HasPrefix(line, "|") {
674+
return true
675+
}
676+
return false
677+
}
678+
679+
// isHorizontalRule checks for markdown horizontal rules: three or more
680+
// -, *, or _ characters (with optional spaces).
681+
func isHorizontalRule(line string) bool {
682+
stripped := strings.ReplaceAll(line, " ", "")
683+
if len(stripped) < 3 {
684+
return false
685+
}
686+
ch := stripped[0]
687+
if ch != '-' && ch != '*' && ch != '_' {
688+
return false
689+
}
690+
for _, c := range stripped {
691+
if byte(c) != ch {
692+
return false
693+
}
694+
}
695+
return true
696+
}

cmd/submit_internal_test.go

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
// cmd/submit_internal_test.go
2+
package cmd
3+
4+
import (
5+
"testing"
6+
)
7+
8+
func TestUnwrapParagraphs(t *testing.T) {
9+
tests := []struct {
10+
name string
11+
in string
12+
want string
13+
}{
14+
{
15+
name: "empty string",
16+
in: "",
17+
want: "",
18+
},
19+
{
20+
name: "single line",
21+
in: "Hello world",
22+
want: "Hello world",
23+
},
24+
{
25+
name: "hard-wrapped paragraph becomes single line",
26+
in: "This is a paragraph that was\nhard-wrapped at around 72 columns\nfor the commit message.",
27+
want: "This is a paragraph that was hard-wrapped at around 72 columns for the commit message.",
28+
},
29+
{
30+
name: "blank lines preserved as paragraph breaks",
31+
in: "First paragraph that is\nhard-wrapped.\n\nSecond paragraph also\nhard-wrapped.",
32+
want: "First paragraph that is hard-wrapped.\n\nSecond paragraph also hard-wrapped.",
33+
},
34+
{
35+
name: "fenced code block preserved verbatim",
36+
in: "Before code:\n\n```go\nfunc main() {\n fmt.Println(\"hello\")\n}\n```\n\nAfter code.",
37+
want: "Before code:\n\n```go\nfunc main() {\n fmt.Println(\"hello\")\n}\n```\n\nAfter code.",
38+
},
39+
{
40+
name: "tilde fenced code block preserved",
41+
in: "Text before.\n\n~~~\ncode here\n~~~\n\nText after.",
42+
want: "Text before.\n\n~~~\ncode here\n~~~\n\nText after.",
43+
},
44+
{
45+
name: "indented code block preserved",
46+
in: "Some text.\n\n indented code line 1\n indented code line 2\n\nMore text.",
47+
want: "Some text.\n\n indented code line 1\n indented code line 2\n\nMore text.",
48+
},
49+
{
50+
name: "unordered list items preserved",
51+
in: "Changes:\n\n- First item\n- Second item that is\n also long\n- Third item",
52+
// The continuation line (2-space indent) is preserved as-is;
53+
// GitHub's markdown renderer already handles this correctly.
54+
want: "Changes:\n\n- First item\n- Second item that is\n also long\n- Third item",
55+
},
56+
{
57+
name: "ordered list items preserved",
58+
in: "Steps:\n\n1. First step\n2. Second step\n3. Third step",
59+
want: "Steps:\n\n1. First step\n2. Second step\n3. Third step",
60+
},
61+
{
62+
name: "headers preserved",
63+
in: "## Section\n\nParagraph that is\nhard-wrapped here.\n\n### Subsection\n\nAnother para.",
64+
want: "## Section\n\nParagraph that is hard-wrapped here.\n\n### Subsection\n\nAnother para.",
65+
},
66+
{
67+
name: "blockquotes preserved",
68+
in: "> This is a quote\n> that spans lines\n\nRegular text.",
69+
want: "> This is a quote\n> that spans lines\n\nRegular text.",
70+
},
71+
{
72+
name: "horizontal rule preserved",
73+
in: "Above\n\n---\n\nBelow",
74+
want: "Above\n\n---\n\nBelow",
75+
},
76+
{
77+
name: "realistic commit message body",
78+
in: "This commit refactors the authentication middleware to\nuse JWT tokens instead of session cookies. The change\nimproves scalability by removing server-side session\nstorage requirements.\n\nKey changes:\n\n- Replace session middleware with JWT validation\n- Add token refresh endpoint\n- Update tests to use new auth flow\n\nBreaking change: clients must now send an\n`Authorization: Bearer <token>` header instead of\nrelying on cookies.",
79+
want: "This commit refactors the authentication middleware to use JWT tokens instead of session cookies. The change improves scalability by removing server-side session storage requirements.\n\nKey changes:\n\n- Replace session middleware with JWT validation\n- Add token refresh endpoint\n- Update tests to use new auth flow\n\nBreaking change: clients must now send an `Authorization: Bearer <token>` header instead of relying on cookies.",
80+
},
81+
{
82+
name: "pipe tables preserved",
83+
in: "Results:\n\n| Name | Value |\n|------|-------|\n| foo | 42 |",
84+
want: "Results:\n\n| Name | Value |\n|------|-------|\n| foo | 42 |",
85+
},
86+
{
87+
name: "trailing whitespace on wrapped lines is trimmed",
88+
in: "Line one with trailing space \nline two.",
89+
want: "Line one with trailing space line two.",
90+
},
91+
{
92+
name: "HTML tags cause bail-out",
93+
in: "Some text that is\nhard-wrapped.\n\n<details>\n<summary>Click me</summary>\nHidden content\n</details>",
94+
want: "Some text that is\nhard-wrapped.\n\n<details>\n<summary>Click me</summary>\nHidden content\n</details>",
95+
},
96+
{
97+
name: "inline HTML tag causes bail-out",
98+
in: "This has a <br/> in it\nand wraps.",
99+
want: "This has a <br/> in it\nand wraps.",
100+
},
101+
{
102+
name: "angle bracket in non-HTML context still unwraps",
103+
in: "The value x < y is\nalways true.",
104+
want: "The value x < y is always true.",
105+
},
106+
}
107+
108+
for _, tt := range tests {
109+
t.Run(tt.name, func(t *testing.T) {
110+
got := unwrapParagraphs(tt.in)
111+
if got != tt.want {
112+
t.Errorf("unwrapParagraphs() mismatch\n got: %q\nwant: %q", got, tt.want)
113+
}
114+
})
115+
}
116+
}
117+
118+
func TestIsBlockElement(t *testing.T) {
119+
blockLines := []string{
120+
"# Header",
121+
"## Header 2",
122+
"- list item",
123+
"* list item",
124+
"+ list item",
125+
"1. ordered",
126+
"12. multi-digit ordered",
127+
"> blockquote",
128+
"| table row",
129+
}
130+
for _, line := range blockLines {
131+
if !isBlockElement(line) {
132+
t.Errorf("expected isBlockElement(%q) = true", line)
133+
}
134+
}
135+
136+
nonBlockLines := []string{
137+
"just text",
138+
"This starts a sentence.",
139+
"2nd place finish",
140+
}
141+
for _, line := range nonBlockLines {
142+
if isBlockElement(line) {
143+
t.Errorf("expected isBlockElement(%q) = false", line)
144+
}
145+
}
146+
}
147+
148+
func TestIsHorizontalRule(t *testing.T) {
149+
rules := []string{"---", "***", "___", "- - -", "* * *", "----", "****"}
150+
for _, r := range rules {
151+
if !isHorizontalRule(r) {
152+
t.Errorf("expected isHorizontalRule(%q) = true", r)
153+
}
154+
}
155+
156+
nonRules := []string{"--", "**", "-", "abc", "---x"}
157+
for _, r := range nonRules {
158+
if isHorizontalRule(r) {
159+
t.Errorf("expected isHorizontalRule(%q) = false", r)
160+
}
161+
}
162+
}

0 commit comments

Comments
 (0)