Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions crates/perry-codegen/src/codegen/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,34 @@ pub(crate) fn decide_full_outline_ic(callable_count: usize) -> bool {
callable_count >= threshold
}

/// Decide how many codegen units to split a module's object compilation into
/// (#5391). A single huge translation unit makes `clang -c` OOM (~15GB on the
/// 13MB bundle); splitting bounds peak compiler memory to roughly whole/N.
///
/// `PERRY_CODEGEN_UNITS=N` forces exactly N units (1 disables splitting).
/// Otherwise auto: 1 unit until the module's callable count crosses a floor,
/// then `ceil(callables / target_per_unit)`, capped — so ordinary per-file
/// modules stay on the single-unit path (default 1, zero behavior change).
/// `PERRY_CODEGEN_UNIT_SIZE` overrides the target callables-per-unit.
pub(crate) fn decide_codegen_units(callable_count: usize) -> usize {
if let Ok(v) = std::env::var("PERRY_CODEGEN_UNITS") {
if let Ok(n) = v.parse::<usize>() {
return n.max(1);
}
}
const MIN_CALLABLES_TO_SPLIT: usize = 8000;
const MAX_UNITS: usize = 48;
let target = std::env::var("PERRY_CODEGEN_UNIT_SIZE")
.ok()
.and_then(|v| v.parse::<usize>().ok())
.filter(|&n| n > 0)
.unwrap_or(6000);
if callable_count < MIN_CALLABLES_TO_SPLIT {
return 1;
}
callable_count.div_ceil(target).clamp(1, MAX_UNITS)
}

pub(super) fn scoped_fn_name(module_prefix: &str, hir_name: &str) -> String {
// Use the INJECTIVE sanitizer (same as scoped_static_method_name): plain
// `sanitize` maps every non-`[A-Za-z0-9_]` char to `_`, so distinct minified
Expand Down
34 changes: 28 additions & 6 deletions crates/perry-codegen/src/codegen/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ mod string_pool;

pub use helpers::resolve_target_triple;
pub(crate) use helpers::{
decide_full_outline_ic, default_target_triple, full_outline_ic_enabled, module_callable_count,
set_full_outline_ic, write_barriers_enabled,
decide_codegen_units, decide_full_outline_ic, default_target_triple, full_outline_ic_enabled,
module_callable_count, set_full_outline_ic, write_barriers_enabled,
};
pub use opts::{
AppMetadata, CompileOptions, FpContractMode, ImportedClass, NamespaceEntry, NamespaceEntryKind,
Expand Down Expand Up @@ -2521,6 +2521,32 @@ pub fn compile_module(hir: &HirModule, opts: CompileOptions) -> Result<Vec<u8>>
crate::native_value::verify_native_rep_records(&llmod.native_rep_records)?;
}

crate::native_value::write_native_rep_artifact_if_enabled(
&hir.name,
&llmod.native_rep_records,
)?;

// #5391 codegen units: large modules split their object compilation into N
// independently-compiled units so clang's peak RSS stays ~whole/N instead of
// OOMing on one giant TU. Gated to large modules (default 1 unit = unchanged
// behavior). `emit_ir_only` and `PERRY_SAVE_LL` want the whole-module text,
// so they take the single-text path; the split path avoids materializing the
// full ~1GB IR string at all (which would defeat the memory win).
let n_units = if opts.emit_ir_only {
1
} else {
decide_codegen_units(module_callable_count(hir))
Comment on lines +2535 to +2538

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check if cross-compilation gating is implemented elsewhere
rg -n "PERRY_CODEGEN_UNITS|render_codegen_units|compile_units_to_object" --type rust -C 3 | head -100

Repository: PerryTS/perry

Length of output: 7863


🏁 Script executed:

# Check the full context around the n_units decision in codegen/mod.rs
sed -n '2520,2560p' crates/perry-codegen/src/codegen/mod.rs

Repository: PerryTS/perry

Length of output: 1664


🏁 Script executed:

# Check the full compile_units_to_object implementation in linker.rs
sed -n '280,320p' crates/perry-codegen/src/linker.rs

Repository: PerryTS/perry

Length of output: 1884


🏁 Script executed:

# Look for any environment variable checks related to PERRY_SAVE_LL or cross-compilation
rg -n "PERRY_SAVE_LL|is_cross|target_triple" crates/perry-codegen/src/ --type rust

Repository: PerryTS/perry

Length of output: 4435


🏁 Script executed:

# Check git log for PR references to cross-compilation and codegen units
git log --oneline --all --grep="5391\|cross-compil\|codegen.*unit" 2>/dev/null | head -20

Repository: PerryTS/perry

Length of output: 914


🏁 Script executed:

# Check if there are any comments in the code mentioning cross-compilation + codegen splitting
rg -n "cross.*compil|target.*arch" crates/perry-codegen/src/codegen/mod.rs crates/perry-codegen/src/linker.rs

Repository: PerryTS/perry

Length of output: 619


🏁 Script executed:

# Check commit messages for `#5391` to see if cross-compilation gating was mentioned
git log --all --format="%B" --grep="5391" 2>/dev/null | head -100

Repository: PerryTS/perry

Length of output: 5531


🏁 Script executed:

# Look for FIXME/TODO comments about cross-compilation in codegen units code
rg -n "FIXME|TODO|XXX" crates/perry-codegen/src/codegen/mod.rs crates/perry-codegen/src/linker.rs | grep -i "cross\|target\|compil\|unit"

Repository: PerryTS/perry

Length of output: 39


🏁 Script executed:

# Check if ld -r command gets any target-specific flags when cross-compiling
sed -n '310,340p' crates/perry-codegen/src/linker.rs

Repository: PerryTS/perry

Length of output: 1075


Add PERRY_SAVE_LL check to n_units decision logic.

The code comment at line 2532 states that both emit_ir_only and PERRY_SAVE_LL should use the single-text path, but the n_units decision at lines 2535–2537 only checks emit_ir_only. When PERRY_SAVE_LL is set, the code incorrectly proceeds to codegen splitting instead of taking the single-text path where PERRY_SAVE_LL is actually handled (line 2558). This causes the directory to be never populated because the split path returns early without reaching the PERRY_SAVE_LL save logic.

Additionally, cross-compilation with splitting should be gated: when cross-compiling (no --target flag override from host), ld -r at line ~320 uses the host linker by default, which may not support the target object format. Pass opts.target.is_some() into the n_units decision to force single-unit mode.

🔧 Proposed fix
+    let save_ll = std::env::var_os("PERRY_SAVE_LL").is_some();
+    let is_cross_compile = opts.target.is_some();
-    let n_units = if opts.emit_ir_only {
+    let n_units = if opts.emit_ir_only || save_ll || is_cross_compile {
         1
     } else {
         decide_codegen_units(module_callable_count(hir))
     };
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@crates/perry-codegen/src/codegen/mod.rs` around lines 2535 - 2538, The
n_units decision logic at the assignment of n_units should check three
conditions to use single-unit mode (n_units = 1) instead of deciding based on
module_callable_count: the existing opts.emit_ir_only check, a new check for the
PERRY_SAVE_LL environment variable being set, and a cross-compilation check for
when opts.target.is_none(). Modify the condition so that if any of these three
conditions are true, set n_units to 1, otherwise call decide_codegen_units with
the hir. This ensures that PERRY_SAVE_LL paths reach the save logic at line 2558
and that cross-compilation without explicit target override uses the single
linker invocation path.

};
Comment thread
coderabbitai[bot] marked this conversation as resolved.
if n_units > 1 {
let units = llmod.render_codegen_units(n_units);
log::debug!(
"perry-codegen: split '{}' into {} codegen units",
hir.name,
units.len()
);
return crate::linker::compile_units_to_object(&units, opts.target.as_deref());
}

let ll_text = llmod.to_ir();
log::debug!(
"perry-codegen: emitted {} bytes of LLVM IR for '{}' ({} interned strings)",
Expand All @@ -2533,10 +2559,6 @@ pub fn compile_module(hir: &HirModule, opts: CompileOptions) -> Result<Vec<u8>>
let filename = format!("{}/{}.ll", save_dir, module_prefix);
let _ = std::fs::write(&filename, &ll_text);
}
crate::native_value::write_native_rep_artifact_if_enabled(
&hir.name,
&llmod.native_rep_records,
)?;
if opts.emit_ir_only {
Ok(ll_text.into_bytes())
} else {
Expand Down
87 changes: 72 additions & 15 deletions crates/perry-codegen/src/codegen/string_pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,26 +217,83 @@ pub(super) fn emit_string_pool(
}
}

// #5391 function splitting: a large bundle interns ~190K strings, and
// emitting every string's init (from-bytes + nanbox + global store + GC-root
// register) into ONE `__perry_init_strings` block makes a single ~68MB
// function that `clang -O0` (forced for oversized modules, #4880) cannot
// compile in practical time — its per-function passes are superlinear in
// function size. The inits are independent (each handle is stored to its own
// global; no SSA value flows between iterations), so split them into chunk
// functions of `STRINGS_PER_CHUNK` each and call them in sequence from the
// init. Combined with codegen-unit splitting, the ~48 chunks bin-pack evenly
// across units instead of one unit carrying the monolith.
let strings_per_chunk: usize = std::env::var("PERRY_STRING_INIT_CHUNK_SIZE")
.ok()
.and_then(|v| v.parse::<usize>().ok())
.filter(|&n| n > 0)
.unwrap_or(4000);
let mut string_chunk_names: Vec<String> = Vec::new();
{
let mut count_in_chunk = strings_per_chunk; // force a new chunk on entry 0
let mut cur_idx = 0usize;
for entry in strings.iter() {
if count_in_chunk >= strings_per_chunk {
if !string_chunk_names.is_empty() {
llmod
.function_mut(cur_idx)
.unwrap()
.block_mut(0)
.unwrap()
.ret_void();
}
let cname = format!(
"__perry_init_strings_{}_chunk{}",
module_prefix,
string_chunk_names.len()
);
llmod
.define_function(&cname, VOID, vec![])
.create_block("entry");
cur_idx = llmod.function_count() - 1;
string_chunk_names.push(cname);
count_in_chunk = 0;
}
let blk = llmod.function_mut(cur_idx).unwrap().block_mut(0).unwrap();

let bytes_ref = format!("@{}", entry.bytes_global);
let handle_ref = format!("@{}", entry.handle_global);
let len_str = entry.byte_len.to_string();
let from_bytes_fn = if entry.is_wtf8 {
"js_string_from_wtf8_bytes"
} else {
"js_string_from_bytes"
};
let handle = blk.call(I64, from_bytes_fn, &[(PTR, &bytes_ref), (I32, &len_str)]);
let nanboxed = blk.call(DOUBLE, "js_nanbox_string", &[(I64, &handle)]);
crate::expr::emit_root_nanbox_store_on_block(blk, &nanboxed, &handle_ref);
let addr_i64 = blk.ptrtoint(&handle_ref, I64);
blk.call_void("js_gc_register_global_root", &[(I64, &addr_i64)]);
count_in_chunk += 1;
}
if !string_chunk_names.is_empty() {
llmod
.function_mut(cur_idx)
.unwrap()
.block_mut(0)
.unwrap()
.ret_void();
}
}

let init_name = format!("__perry_init_strings_{}", module_prefix);
let init_fn = llmod.define_function(&init_name, VOID, vec![]);
let _ = init_fn.create_block("entry");
let blk = init_fn.block_mut(0).unwrap();

for entry in strings.iter() {
let bytes_ref = format!("@{}", entry.bytes_global);
let handle_ref = format!("@{}", entry.handle_global);
let len_str = entry.byte_len.to_string();

let init_fn = if entry.is_wtf8 {
"js_string_from_wtf8_bytes"
} else {
"js_string_from_bytes"
};
let handle = blk.call(I64, init_fn, &[(PTR, &bytes_ref), (I32, &len_str)]);
let nanboxed = blk.call(DOUBLE, "js_nanbox_string", &[(I64, &handle)]);
crate::expr::emit_root_nanbox_store_on_block(blk, &nanboxed, &handle_ref);
let addr_i64 = blk.ptrtoint(&handle_ref, I64);
blk.call_void("js_gc_register_global_root", &[(I64, &addr_i64)]);
// Run the string-init chunks first, in order (each populates its slice of
// the string-handle globals before any user code runs).
for cname in &string_chunk_names {
blk.call_void(cname, &[]);
}

// Register display names for top-level user functions so
Expand Down
14 changes: 14 additions & 0 deletions crates/perry-codegen/src/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,20 @@ impl LlFunction {
self.blocks.last().map(|b| b.label.as_str())
}

/// Cheap estimate of this function's rendered IR size in bytes, used to
/// balance codegen-unit partitioning (#5391) without rendering twice. Sums
/// the byte length of every instruction + entry alloca (the dominant terms);
/// block labels/headers are a small fixed overhead per block.
pub fn estimated_ir_bytes(&self) -> usize {
let body: usize = self
.blocks
.iter()
.map(|b| b.instructions_iter().map(|i| i.len() + 1).sum::<usize>() + b.label.len() + 4)
.sum();
let allocas: usize = self.entry_allocas.iter().map(|a| a.len() + 1).sum();
body + allocas + self.name.len() + 64
}

pub fn to_ir(&self) -> String {
let param_str = self
.params
Expand Down
60 changes: 60 additions & 0 deletions crates/perry-codegen/src/linker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,66 @@ pub fn compile_ll_to_object(ll_text: &str, target_triple: Option<&str>) -> Resul
Ok(bytes)
}

/// Compile a module that was split into codegen units (#5391) to a SINGLE
/// object file's bytes. Each unit `.ll` (from `LlModule::render_codegen_units`)
/// is compiled independently by `clang -c` — bounding peak compiler memory to
/// roughly one unit's worth instead of the whole module — and the resulting
/// objects are merged with a partial link (`ld -r`) into one object, preserving
/// `compile_module`'s single-`Vec<u8>` contract and the existing one-object
/// link path. Units are compiled sequentially so peak RSS stays at one unit.
pub fn compile_units_to_object(units: &[String], target_triple: Option<&str>) -> Result<Vec<u8>> {
match units {
[] => return compile_ll_to_object("", target_triple),
[only] => return compile_ll_to_object(only, target_triple),
_ => {}
}

let tmp_dir = env::temp_dir();
let pid = std::process::id();
let nonce = TEMP_NONCE_COUNTER.fetch_add(1, Ordering::Relaxed);

let mut obj_paths: Vec<PathBuf> = Vec::with_capacity(units.len());
for (i, unit) in units.iter().enumerate() {
let bytes = compile_ll_to_object(unit, target_triple)
.with_context(|| format!("codegen unit {}/{} failed to compile", i + 1, units.len()))?;
let p = tmp_dir.join(format!("perry_cgu_{}_{}_{}.o", pid, nonce, i));
fs::write(&p, &bytes)
.with_context(|| format!("failed to write codegen-unit object {}", p.display()))?;
obj_paths.push(p);
}

let combined = tmp_dir.join(format!("perry_cgu_{}_{}_combined.o", pid, nonce));
let ld = env::var("PERRY_LD").unwrap_or_else(|_| "ld".to_string());
let mut cmd = Command::new(&ld);
cmd.arg("-r").arg("-o").arg(&combined);
for p in &obj_paths {
cmd.arg(p);
}
let out = cmd
.output()
.with_context(|| format!("failed to invoke partial linker `{} -r`", ld))?;
let result = if out.status.success() {
fs::read(&combined)
.with_context(|| format!("failed to read merged object {}", combined.display()))
} else {
Err(anyhow!(
"partial link `{} -r` of {} codegen units failed (status={}).\nstderr:\n{}",
ld,
units.len(),
out.status,
String::from_utf8_lossy(&out.stderr)
))
};

if env::var_os("PERRY_LLVM_KEEP_IR").is_none() {
for p in &obj_paths {
let _ = fs::remove_file(p);
}
let _ = fs::remove_file(&combined);
}
result
}

fn json_string(value: &str) -> String {
let mut out = String::with_capacity(value.len() + 2);
out.push('"');
Expand Down
Loading
Loading