Skip to content

Commit bb4af8a

Browse files
feat(abi): universal handle-based wasm module ABI v1 + edge-pdk SDK
1 parent c58676e commit bb4af8a

18 files changed

Lines changed: 2459 additions & 505 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[workspace]
2-
members = ["compiler"]
2+
members = ["compiler", "edge-pdk", "edge-pdk/macros", "examples/slugify-mod"]
33
resolver = "2"
44

55
[workspace.package]

compiler/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ repository.workspace = true
77
description = "Single-pass SSA compiler based on CPython 3.13: hand-written lexer, token-to-bytecode parser, threaded-code virtual machine with NaN-boxed values, inline caching, template memoization, and a mark-sweep garbage collector. Ships as a WebAssembly module."
88
readme = "README.md"
99
autotests = false
10+
autobins = false
1011

1112
[lib]
1213
name = "compiler_lib"

compiler/src/abi.rs

Lines changed: 383 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,383 @@
1+
/* ===========================================================================
2+
* EDGE PYTHON — WASM MODULE ABI v1 (sealed contract)
3+
* ===========================================================================
4+
*
5+
* ┌──────────────────────────────────────────────────────────────────┐
6+
* │ S E A L E D C O N T R A C T │
7+
* ├──────────────────────────────────────────────────────────────────┤
8+
* │ │
9+
* │ This module defines the Edge Python wasm-abi v1. The op codes, │
10+
* │ tag values, error kinds, and primitive ABI helpers below form │
11+
* │ the public contract every host that loads a guest `.wasm` │
12+
* │ module must honour, and every guest module already in the wild │
13+
* │ relies on. │
14+
* │ │
15+
* │ DO NOT MODIFY the numeric values, the function signatures, or │
16+
* │ the layout of `HandleTable` / `ErrorStash`. │
17+
* │ │
18+
* │ Bug fixes — correcting divergences from the contract — are the │
19+
* │ only acceptable maintenance. New capabilities arrive as new │
20+
* │ values inside the existing `Op` enum (consumed by `edge_op`), │
21+
* │ never as new imports or signature breaks. │
22+
* │ │
23+
* │ Lives in `compiler/src/abi.rs`. The orchestration that wires │
24+
* │ this module to the Edge Python parser/VM is in `main.rs`. The │
25+
* │ reference author-side SDK is the `edge-pdk` crate. See │
26+
* │ `documentation/reference/wasm-abi.md` for the user-facing spec │
27+
* │ and worked examples (Rust + Python). │
28+
* │ │
29+
* └──────────────────────────────────────────────────────────────────┘
30+
*
31+
* GUEST EXPORT SHAPE
32+
*
33+
* extern "C" fn <name>(argv: *const u32, argc: u32, out: *mut u32) -> i32;
34+
*
35+
* argv : pointer (in guest linear memory) to `argc` opaque host-managed
36+
* handles (u32) — one per positional argument.
37+
* argc : positional argument count.
38+
* out : pointer (in guest linear memory) where the guest writes ONE
39+
* handle for the return value.
40+
* return: 0 = success, 1 = error (host pulls via `edge_take_error`).
41+
*
42+
* Plus the obligatory:
43+
*
44+
* extern "C" fn __edge_alloc(size: u32) -> *mut u8;
45+
*
46+
* Used by the host shim to stage argv arrays in the guest's linear
47+
* memory before invoking each export.
48+
*
49+
* GUEST-SIDE IMPORTS (from `env`)
50+
*
51+
* fn edge_op(op, recv, name_ptr, name_len, argv_ptr, argc, out) -> i32;
52+
* fn edge_encode(tag, ptr, len) -> u32;
53+
* fn edge_decode(h, out_tag, dst, dst_max) -> i32;
54+
* fn edge_release(h);
55+
* fn edge_throw(kind, msg_ptr, msg_len);
56+
* fn edge_take_error(out_kind, dst, dst_max) -> i32;
57+
*
58+
* These six functions are the totality of the wire. Their full text is
59+
* at `documentation/reference/wasm-abi.md`.
60+
*
61+
* THIS MODULE'S ROLE
62+
*
63+
* `abi` is the host-internal, VM-agnostic half of the contract: it
64+
* owns the sealed numeric values (Op / Tag / ErrorKind), the handle
65+
* table (refcounted u32 → u64 Val bits), the error stash, and the
66+
* primitive codec for None / Bool / Int / Float / Bytes.
67+
*
68+
* `main.rs` is the WASM orchestration that injects this module as a
69+
* dependency: it owns the WasmHostResolver, the parser/VM lifecycle,
70+
* the JS imports (js_print / js_call_native / js_fetch_bytes), and
71+
* the VM-coupled dispatch (Op::Call → method lookup, etc.). The split
72+
* keeps the contract free of VM-specific churn — extending the
73+
* parser, retiring opcodes from the VM, or swapping out the heap
74+
* layout never requires touching this file.
75+
* =========================================================================== */
76+
77+
use alloc::{string::String, vec::Vec};
78+
79+
/* ---------- Op codes (sealed) --------------------------------------- */
80+
81+
#[allow(non_camel_case_types)]
82+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
83+
#[repr(u32)]
84+
pub enum Op {
85+
Call = 0,
86+
GetAttr = 1,
87+
SetAttr = 2,
88+
GetItem = 3,
89+
SetItem = 4,
90+
Len = 5,
91+
Iter = 6,
92+
IterNext = 7,
93+
}
94+
95+
impl Op {
96+
pub fn from_u32(op: u32) -> Option<Self> {
97+
match op {
98+
0 => Some(Self::Call),
99+
1 => Some(Self::GetAttr),
100+
2 => Some(Self::SetAttr),
101+
3 => Some(Self::GetItem),
102+
4 => Some(Self::SetItem),
103+
5 => Some(Self::Len),
104+
6 => Some(Self::Iter),
105+
7 => Some(Self::IterNext),
106+
_ => None,
107+
}
108+
}
109+
}
110+
111+
/* ---------- Tags (sealed) ------------------------------------------- */
112+
113+
#[allow(non_camel_case_types)]
114+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
115+
#[repr(u32)]
116+
pub enum Tag {
117+
None = 0,
118+
Bool = 1,
119+
Int = 2,
120+
Float = 3,
121+
/// UTF-8 bytes; encoder builds a `str`, decoder of a `str` returns
122+
/// its bytes.
123+
Bytes = 4,
124+
}
125+
126+
impl Tag {
127+
pub fn from_u32(t: u32) -> Option<Self> {
128+
match t {
129+
0 => Some(Self::None),
130+
1 => Some(Self::Bool),
131+
2 => Some(Self::Int),
132+
3 => Some(Self::Float),
133+
4 => Some(Self::Bytes),
134+
_ => None,
135+
}
136+
}
137+
}
138+
139+
/// Sentinel tag returned by `edge_decode` for invalid handles or
140+
/// non-primitive values (caller should use `edge_op` instead).
141+
pub const TAG_INVALID: u32 = u32::MAX;
142+
143+
/* ---------- Error kinds (sealed) ------------------------------------ */
144+
145+
#[allow(non_camel_case_types)]
146+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
147+
#[repr(u32)]
148+
pub enum ErrorKind {
149+
Type = 0,
150+
Value = 1,
151+
Runtime = 2,
152+
Attribute = 3,
153+
Index = 4,
154+
Key = 5,
155+
Custom = 6,
156+
}
157+
158+
/* ---------- Handle table -------------------------------------------- */
159+
160+
/// One slot in the handle table. `rc=0` means the slot is on the free
161+
/// list. The exposed handle is `index + 1` so handle `0` reserves
162+
/// "invalid".
163+
struct HandleSlot {
164+
/// Raw u64 representation of the host's `Val` type. The ABI module
165+
/// does not inspect it — encode and decode go through `classify_*`.
166+
val: u64,
167+
rc: u32,
168+
}
169+
170+
/// Refcounted mapping `u32 → u64` (host-side Val bits). Process-wide;
171+
/// the host typically holds a single instance for the lifetime of a
172+
/// script run and clears it via `reset_modules()`.
173+
pub struct HandleTable {
174+
slots: Vec<HandleSlot>,
175+
free_list: Vec<u32>,
176+
}
177+
178+
impl Default for HandleTable {
179+
fn default() -> Self { Self::new() }
180+
}
181+
182+
impl HandleTable {
183+
pub const fn new() -> Self {
184+
Self { slots: Vec::new(), free_list: Vec::new() }
185+
}
186+
187+
/// Reset to empty state. Called by the host between runs.
188+
pub fn clear(&mut self) {
189+
self.slots.clear();
190+
self.free_list.clear();
191+
}
192+
193+
/// Register a value. Returns a fresh handle (rc=1).
194+
pub fn put(&mut self, val: u64) -> u32 {
195+
if let Some(idx) = self.free_list.pop() {
196+
self.slots[idx as usize] = HandleSlot { val, rc: 1 };
197+
idx + 1
198+
} else {
199+
self.slots.push(HandleSlot { val, rc: 1 });
200+
self.slots.len() as u32
201+
}
202+
}
203+
204+
/// Look up a value by handle, or `None` if invalid / freed.
205+
pub fn get(&self, h: u32) -> Option<u64> {
206+
if h == 0 { return None; }
207+
self.slots.get((h - 1) as usize)
208+
.filter(|s| s.rc > 0)
209+
.map(|s| s.val)
210+
}
211+
212+
/// Decrement refcount; free the slot when it reaches 0. Defensive
213+
/// against double-release.
214+
pub fn release(&mut self, h: u32) {
215+
if h == 0 { return; }
216+
let idx = (h - 1) as usize;
217+
if let Some(slot) = self.slots.get_mut(idx)
218+
&& slot.rc > 0
219+
{
220+
slot.rc -= 1;
221+
if slot.rc == 0 { self.free_list.push(idx as u32); }
222+
}
223+
}
224+
}
225+
226+
/* ---------- Error stash --------------------------------------------- */
227+
228+
/// Single-slot stash drained by `edge_take_error`. The host populates
229+
/// it from its own dispatch failures and from `edge_throw` calls.
230+
#[derive(Default)]
231+
pub struct ErrorStash(Option<(u32, String)>);
232+
233+
impl ErrorStash {
234+
pub const fn new() -> Self { Self(None) }
235+
pub fn clear(&mut self) { self.0 = None; }
236+
237+
/// Replace any pending error with `(kind, msg)`.
238+
pub fn set(&mut self, kind: u32, msg: String) {
239+
self.0 = Some((kind, msg));
240+
}
241+
242+
/// Convenience: stash a typed error.
243+
pub fn set_typed(&mut self, kind: ErrorKind, msg: String) {
244+
self.0 = Some((kind as u32, msg));
245+
}
246+
247+
/// Take the error if present.
248+
pub fn take(&mut self) -> Option<(u32, String)> { self.0.take() }
249+
250+
/// Peek without consuming. Used by `edge_take_error`'s
251+
/// "buffer too small" path so the error stays pending for retry.
252+
pub fn peek(&self) -> Option<(u32, &str)> {
253+
self.0.as_ref().map(|(k, m)| (*k, m.as_str()))
254+
}
255+
}
256+
257+
/* ---------- Primitive codec helpers --------------------------------- */
258+
259+
/// What `edge_encode` should do with the raw bytes the guest passed.
260+
/// `Direct` is a primitive whose final Val bits are computed by this
261+
/// module; `AllocStr` is a UTF-8 string the host must allocate on its
262+
/// heap; `Invalid` is a malformed payload.
263+
pub enum EncodeRequest<'a> {
264+
Direct(u64),
265+
AllocStr(&'a str),
266+
Invalid,
267+
}
268+
269+
/// Inspect the bytes a guest passed to `edge_encode` and decide how
270+
/// the host should materialize the value. The NaN-boxing layout for
271+
/// None / Bool / Int / Float lives here (and only here): changing it
272+
/// requires bumping the wasm-abi version.
273+
pub fn classify_encode(tag: u32, bytes: &[u8]) -> EncodeRequest<'_> {
274+
/* NaN-boxing constants — must match the host's `Val` impl. */
275+
const QNAN: u64 = 0x7FFC_0000_0000_0000;
276+
const TAG_NONE_BITS: u64 = QNAN | 1;
277+
const TAG_TRUE_BITS: u64 = QNAN | 2;
278+
const TAG_FALSE_BITS: u64 = QNAN | 3;
279+
const TAG_INT_BITS: u64 = QNAN | 0x8000_0000_0000_0000;
280+
281+
match Tag::from_u32(tag) {
282+
Some(Tag::None) => EncodeRequest::Direct(TAG_NONE_BITS),
283+
Some(Tag::Bool) => {
284+
let b = !bytes.is_empty() && bytes[0] != 0;
285+
EncodeRequest::Direct(if b { TAG_TRUE_BITS } else { TAG_FALSE_BITS })
286+
}
287+
Some(Tag::Int) => {
288+
if bytes.len() != 8 { return EncodeRequest::Invalid; }
289+
let mut buf = [0u8; 8];
290+
buf.copy_from_slice(bytes);
291+
let i = i64::from_le_bytes(buf);
292+
EncodeRequest::Direct(TAG_INT_BITS | (i as u64 & 0x0000_FFFF_FFFF_FFFF))
293+
}
294+
Some(Tag::Float) => {
295+
if bytes.len() != 8 { return EncodeRequest::Invalid; }
296+
let mut buf = [0u8; 8];
297+
buf.copy_from_slice(bytes);
298+
EncodeRequest::Direct(f64::from_le_bytes(buf).to_bits())
299+
}
300+
Some(Tag::Bytes) => match core::str::from_utf8(bytes) {
301+
Ok(s) => EncodeRequest::AllocStr(s),
302+
Err(_) => EncodeRequest::Invalid,
303+
},
304+
None => EncodeRequest::Invalid,
305+
}
306+
}
307+
308+
/// What `edge_decode` should do with a Val u64. `Primitive` returns the
309+
/// bytes ready to copy into the guest's buffer; `Heap` defers to the
310+
/// host (the value is heap-resident — the host materializes its bytes,
311+
/// e.g. UTF-8 for a `str`); `Invalid` is a malformed Val.
312+
pub enum DecodeBits {
313+
Primitive { tag: u32, bytes: PrimitiveBytes },
314+
Heap,
315+
Invalid,
316+
}
317+
318+
pub enum PrimitiveBytes {
319+
None,
320+
Bool(u8),
321+
Eight([u8; 8]),
322+
}
323+
324+
impl PrimitiveBytes {
325+
pub fn as_slice(&self) -> &[u8] {
326+
match self {
327+
Self::None => &[],
328+
Self::Bool(b) => core::slice::from_ref(b),
329+
Self::Eight(a) => a.as_slice(),
330+
}
331+
}
332+
}
333+
334+
/// Inspect raw Val bits to extract the primitive kind. Returns
335+
/// `DecodeBits::Heap` for QNAN-tagged heap handles — the host must
336+
/// consult its own heap to materialize the value (e.g. read the bytes
337+
/// of a `Str` from `HeapPool`).
338+
pub fn classify_decode(val_bits: u64) -> DecodeBits {
339+
/* Same NaN-boxing constants as `classify_encode`. */
340+
const QNAN: u64 = 0x7FFC_0000_0000_0000;
341+
const SIGN: u64 = 0x8000_0000_0000_0000;
342+
const TAG_INT: u64 = QNAN | SIGN;
343+
344+
// Float: any pattern that ISN'T QNAN-tagged.
345+
if (val_bits & QNAN) != QNAN {
346+
return DecodeBits::Primitive {
347+
tag: Tag::Float as u32,
348+
bytes: PrimitiveBytes::Eight(f64::from_bits(val_bits).to_le_bytes()),
349+
};
350+
}
351+
// Int: QNAN | SIGN with payload.
352+
if (val_bits & (QNAN | SIGN)) == TAG_INT {
353+
let raw = (val_bits & 0x0000_FFFF_FFFF_FFFF) as i64;
354+
let sign_extended = (raw << 16) >> 16;
355+
return DecodeBits::Primitive {
356+
tag: Tag::Int as u32,
357+
bytes: PrimitiveBytes::Eight(sign_extended.to_le_bytes()),
358+
};
359+
}
360+
// Singletons (None / True / False) and heap handles.
361+
let lower = val_bits & 0xF;
362+
if (val_bits & QNAN) == QNAN && (val_bits & SIGN) == 0 {
363+
if val_bits == QNAN | 1 {
364+
return DecodeBits::Primitive {
365+
tag: Tag::None as u32, bytes: PrimitiveBytes::None,
366+
};
367+
}
368+
if val_bits == QNAN | 2 {
369+
return DecodeBits::Primitive {
370+
tag: Tag::Bool as u32, bytes: PrimitiveBytes::Bool(1),
371+
};
372+
}
373+
if val_bits == QNAN | 3 {
374+
return DecodeBits::Primitive {
375+
tag: Tag::Bool as u32, bytes: PrimitiveBytes::Bool(0),
376+
};
377+
}
378+
if lower >= 4 {
379+
return DecodeBits::Heap;
380+
}
381+
}
382+
DecodeBits::Invalid
383+
}

0 commit comments

Comments
 (0)