AssemblyScript介绍2

AssemblyScript介绍2

上一篇文章从整体上讨论了AssemblyScript(后文简称AS)程序如何被编译成WebAssembly(后文简称Wasm)模块,详细介绍了AS语言各种要素如何映射到Wasm二进制模块的各个段。这一篇文章将调整焦距,把焦点对准函数。我们将讨论AS编译器如何使用Wasm指令集来实现各种语法要素。在开始之前,我们先简单回顾一下Wasm指令集(关于Wasm模块和指令集的详细介绍可以参考之前的系列文章):

Wasm采用栈式虚拟机(Stack Based Virtual Machine)以及字节码(Bytecode),其指令可以分为五大类:

  • 控制指令(Control Instructions),包括结构化控制指令、跳转指令、函数调用指令等。
  • 参数指令(Parametric Instructions),只有两条:dropselect
  • 变量指令(Variable Instructions),包括局部变量指令和全局变量指令。
  • 内存指令(Memory Instructions),包括存储指令、加载指令等。
  • 数值指令(Numeric Instructions),包括常量指令、测试指令、比较指令、一元运算指令、二元运算指令、类型转换指令。

接下来将结合实例代码详细介绍AS编译器如何利用这五类指令。为了便于测试,后文给出的部分示例代码调用了外部函数。这些外部函数只是为了配合示例代码,因此它们的实现并不重要。下面统一给出这些外部函数的声明:

declare function printI32(n: i32): void;
declare function printI64(n: i64): void;
declare function printF32(n: f32): void;
declare function printF64(n: f64): void;
declare function randomI32(): i32;

控制指令

如前所述,Wasm控制指令包括结构化控制指令(blockloopif-else)、跳转指令(brbr_ifbr_tablereturn)、函数调用指令(callcall_indirerct),以及nopunreachable。其中结构化控制指令和跳转指令配合可以实现AS语言的各种控制语句,例如if-else语句、for循环语句、switch-case语句等。call指令可以实现AS函数调用,call_indirerct指令则可以支持一等函数(First-class Function)。

AS语言的if-else语句可以直接使用Wasm的if-else指令实现,下面是一个例子:

export function printEven(n: i32): void {
  if (n  % 2 == 0) {
    printI32(1);
  } else {
    printI32(0);
  }
}

下面是编译结果(已经将编译后的函数字节码反编译为WAT,后文不再赘述):

(func $printEven (type 0) (param i32)
  (if
    (i32.rem_s (local.get 0) (i32.const 2))
    (then (call $printI32 (i32.const 0)))
    (else (call $printI32 (i32.const 1)))
  )
)

上面例子也展示了call指令的用法,后面就不再单独介绍了。顺便说一下,一些简单的if-else语句会被AS编译器优化为select指令,下面是一个例子:

export function max(a: i32, b: i32): i32 {
  if (a > b) {
    return a;
  } else {
    return b;
  }
}

下面是编译结果:

(func $max (type 2) (param i32 i32) (result i32)
  (select
    (local.get 0)
    (local.get 1)
    (i32.gt_s (local.get 0) (local.get 1))
  )
)

AS语言的forwhiledo-while等循环语句可以用Wasm的loop指令实现。注意loop指令并不能自动形成循环,所以必须要和brbr_ifbr_table跳转指令一起使用。下面来看一个稍微复杂一点的例子:

export function printNums(n: i32): void {
  for (let i: i32 = 0; i < n; i++) {
    printI32(i);
    if (i == 100) {
      break;
    }
  }
}

这个例子展示了loopblockbrbr_if指令的用法,下面是编译结果:

(func $printNums (type 0) (param i32)
  (local i32)
  (loop  ;; label = @1
    (if  ;; label = @2
      (i32.lt_s (local.get 1) (local.get 0))
      (then
        (block  ;; label = @3
          (call $printI32 (local.get 1))
          (br_if 0 (;@3;)
            (i32.eq (local.get 1) (i32.const 100)))
          (local.set 1
            (i32.add (local.get 1) (i32.const 1)))
          (br 2 (;@1;))
        ) ;; end of block
      ) ;; end of then
    ) ;; end of if
  ) ;; end of loop
)

AS语言的switch-case语句可以用Wasm的br_table指令来实现,下面是一个例子:

export function mul100(n: i32): i32 {
  switch (n) {
    case 1: return 100;
    case 2: return 200;
    case 3: return 300;
    default: return n * 100;
  }
}

除了br_table指令,这个例子还展示了return指令的用法,下面是编译结果:

(func $mul100 (type 1) (param i32) (result i32)
  (block  ;; label = @1
    (block  ;; label = @2
      (block  ;; label = @3
        (block  ;; label = @4
          (br_table 0 (;@4;) 1 (;@3;) 2 (;@2;) 3 (;@1;)
            (i32.sub (local.get 0) (i32.const 1))))
        (return (i32.const 100)))
      (return (i32.const 200)))
    (return (i32.const 300)))
  (i32.mul (local.get 0) (i32.const 100))
)

AS语言里的一等函数,和C/C++等语言中函数指针概念比较类似,可以用call_indirect指令实现。下面来看一个例子:

type OP = (a: i32, b: i32) => i32;

function add(a: i32, b: i32): i32 { return a + b; }
function sub(a: i32, b: i32): i32 { return a - b; }
function mul(a: i32, b: i32): i32 { return a * b; }
function div(a: i32, b: i32): i32 { return a / b; }

export function calc(a: i32, b: i32, op: i32): i32 {
  return getOp(op)(a, b);
}

function getOp(op: i32): OP {
  switch (op) {
    case 1: return add;
    case 2: return sub;
    case 3: return mul;
    case 4: return div;
    default: return add;
  }
}

下面是编译结果,请注意观察table和elem段的内容,以及calc()函数字节码:

(module
  (type (;0;) (func (param i32 i32) (result i32)))
  (type (;1;) (func (param i32) (result i32)))
  (type (;2;) (func (param i32 i32 i32) (result i32)))
  (func $add (type 0) (i32.add (local.get 0) (local.get 1)))
  (func $sub (type 0) (i32.sub (local.get 0) (local.get 1)))
  (func $mul (type 0) (i32.mul (local.get 0) (local.get 1)))
  (func $div (type 0) (i32.div_s (local.get 0) (local.get 1)))
  (func $getOp (type 1) (param i32) (result i32) (;; 省略 ;;))
  (func $calc (type 2) (param i32 i32 i32) (result i32)
    (call_indirect (type 0)
      (local.get 0)
      (local.get 1)
      (call $getOp (local.get 2))
    )
  )
  (table (;0;) 5 funcref)
  (memory (;0;) 0)
  (export "memory" (memory 0))
  (export "calc" (func $calc))
  (export "getOp" (func $getOp))
  (elem (;0;) (i32.const 1) func $add $sub $mul $div)
)

最后让我们来看看unreachable指令。AS计划在Wasm异常处理提案通过后再支持异常处理,目前抛出异常会导致abort()函数被调用。我们可以通过添加编译器选项--use abort=来禁用abort,这样编译器就会将abort()函数调用替换为一条unreachable指令。除此之外,我们也可以通过直接调用低级的unreachable()函数来显式插入一条unreachable指令,下面是一个例子:

export function crash2(): void {
  unreachable();
}

编译结果也很简单:

(func $crash2 (type 1)
  (unreachable)
)

参数指令

参数指令较为简单,只有dropselect两条。其中select指令在前面介绍if-else语句时已经提到过了,这里就不再单独介绍了。drop指令可以将操作数栈顶多余的操作数弹出扔掉,下面来看一个简单的例子:

export function dropRandom(): void {
  randomI32();
}

编译结果也很简单:

(func $dropRandom (type 0)
  (drop (call $randomI32))
)

变量指令

局部变量指令共三条:local.getlocal.setlocal.tee。如果不考虑优化,每个AS函数都可以被编译器编译成一个Wasm函数。函数参数和局部变量的读写操作可以通过局部变量指令来完成,下面来看一个例子:

export function addLocals(a: i32, b: i32): i32 {
  let c: i32 = a + b;
  return c;
}

下面是编译结果(为了便于观察结果,在编译部分示例代码时关闭了编译器优化,后文不再赘述):

(func $addLocals (type 1) (param i32 i32) (result i32)
  (local i32)
  (local.set 2 (i32.add (local.get 0) (local.get 1)))
  (local.get 2)
)

全局变量指令只有两条:global.getglobal.set。AS语言的全局变量可以直接用Wasm全局变量来实现,全局变量的读写操作可以通过全局变量指令来完成,下面来看一个例子:

let a: i32;
let b: i32;
let c: i32;

export function addGlobals(): void {
  c = a + b;
}

下面是完整的编译结果:

(module
  (type (;0;) (func))
  (func $addGlobals (type 0)
    (global.set 2 (i32.add (global.get 0) (global.get 1)))
  )
  (global (;0;) (mut i32) (i32.const 0))
  (global (;1;) (mut i32) (i32.const 0))
  (global (;2;) (mut i32) (i32.const 0))
  (export "addGlobals" (func $addGlobals))
)

内存指令

Wasm虚拟机可以附带一块虚拟内存,并且提供了丰富的指令来操作这块内存。其中load系列指令可以从内存加载数据,放入操作树栈。store系列指令可以从操作数栈拿出数据,存入内存。此外,通过memory.size指令可以获取内存的当前页数,通过memory.grow指令可以按页扩展内存。我们将通过一个简单的结构体来帮助我们观察内存指令的使用,下面是这个结构体的定义:

class S {
  a: i8; b: u8; c: i16; d: u16; e: i32; f: u32; g: i64; h: u64;
  i: f32; j: f64;
}

下面这个函数展示了i32类型load指令的用法:

export function loadI32(s: S): void {
  printI32(s.a as i32); // i32.load8_s
  printI32(s.b as i32); // i32.load8_u
  printI32(s.c as i32); // i32.load16_s
  printI32(s.d as i32); // i32.load16_u
  printI32(s.e as i32); // i32.load
  printI32(s.f as i32); // i32.load
}

下面是编译结果。通过load指令的offset立即数可以看出,AS编译器并没有对结构体字段进行重新排列,但是进行了适当的对齐。

(func $loadI32 (type 0) (param i32)
  (call $printI32 (i32.load8_s            (local.get 0)))
  (call $printI32 (i32.load8_u  offset=1  (local.get 0)))
  (call $printI32 (i32.load16_s offset=2  (local.get 0)))
  (call $printI32 (i32.load16_u offset=4  (local.get 0)))
  (call $printI32 (i32.load     offset=8  (local.get 0)))
  (call $printI32 (i32.load     offset=12 (local.get 0)))
)

下面这个函数展示了i64类型load指令的用法:

export function loadI64(s: S): void {
  printI64(s.a as i64); // i64.load8_s?
  printI64(s.b as i64); // i64.load8_u?
  printI64(s.c as i64); // i64.load16_s?
  printI64(s.d as i64); // i64.load16_u?
  printI64(s.e as i64); // i64.load32_s?
  printI64(s.f as i64); // i64.load32_u?
  printI64(s.g as i64); // i64.load
  printI64(s.h as i64); // i64.load
}

下面是编译结果。可以看到,预期使用i64类型load指令的地方,AS编译器使用了i32类型load指令并通过extend指令进行整数拉升。

(func $loadI64 (type 0) (param i32)
  (call $printI64 (i64.extend_i32_s (i32.load8_s            (local.get 0))))
  (call $printI64 (i64.extend_i32_u (i32.load8_u  offset=1  (local.get 0))))
  (call $printI64 (i64.extend_i32_s (i32.load16_s offset=2  (local.get 0))))
  (call $printI64 (i64.extend_i32_u (i32.load16_u offset=4  (local.get 0))))
  (call $printI64 (i64.extend_i32_s (i32.load     offset=8  (local.get 0))))
  (call $printI64 (i64.extend_i32_u (i32.load     offset=12 (local.get 0))))
  (call $printI64 (i64.load offset=16 (local.get 0)))
  (call $printI64 (i64.load offset=24 (local.get 0)))
)

下面这个函数展示了float类型load指令的用法:

export function loadF(s: S): void {
  printF32(s.i); // f32.load
  printF64(s.j); // f64.load
}

下面是编译结果:

(func $loadF (type 0) (param i32)
  (call $printF32 (f32.load offset=32 (local.get 0)))
  (call $printF64 (f64.load offset=40 (local.get 0)))
)

相比load指令,store指令较为简单。下面的例子展示了store指令的用法:

export function store(s: S, v: i64): void {
  s.a = v as i8;  // i32.store8
  s.b = v as u8;  // i32.store8
  s.c = v as i16; // i32.store16
  s.d = v as u16; // i32.store16
  s.e = v as i32; // i32.store
  s.f = v as u32; // i32.store
  s.g = v as i64; // i64.store
  s.h = v as u64; // i64.store
  s.i = v as f32; // f32.store
  s.j = v as f64; // f64.store
}

下面是编译结果:

(func $store (type 1) (param i32 i64)
  (i32.store8            (local.get 0) (i32.wrap_i64 (local.get 1)))
  (i32.store8  offset=1  (local.get 0) (i32.wrap_i64 (local.get 1)))
  (i32.store16 offset=2  (local.get 0) (i32.wrap_i64 (local.get 1)))
  (i32.store16 offset=4  (local.get 0) (i32.wrap_i64 (local.get 1)))
  (i32.store   offset=8  (local.get 0) (i32.wrap_i64 (local.get 1)))
  (i32.store   offset=12 (local.get 0) (i32.wrap_i64 (local.get 1)))
  (i64.store   offset=16 (local.get 0) (local.get 1))
  (i64.store   offset=24 (local.get 0) (local.get 1))
  (f32.store   offset=32 (local.get 0) (f32.convert_i64_s (local.get 1)))
  (f64.store   offset=40 (local.get 0) (f64.convert_i64_s (local.get 1)))
)

和前面介绍过的unreachable指令一样,memory.sizememory.grow指令也可以通过内置函数来生成,下面是一个简单的例子:

export function sizeAndGrow(n: i32): void {
  printI32(memory.size());
  printI32(memory.grow(n));
}

下面是编译结果:

(func $sizeAndGrow (type 0) (param i32)
  (call $printI32 (memory.size))
  (call $printI32 (memory.grow (local.get 0)))
)

数值指令

如前文所述,数值指令又可以分为常量指令、测试指令、比较指令、一元和二元运算指令,以及类型转换指令。其中常量指令共四条,AS语言里的数值字面量(Literals)可以用常量指令实现,下面是一个例子:

export function consts(): void {
  printI32(1234); // i32.const
  printI64(5678); // i64.const
  printF32(3.14); // f32.const
  printF64(2.71); // f64.const
}

下面是编译结果:

(func consts (type 1)
  (call $printI32 (i32.const 1234))
  (call $printI64 (i64.const 5678))
  (call $printF32 (f32.const 0x1.91eb86p+1 (;=3.14;)))
  (call $printF64 (f64.const 0x1.5ae147ae147aep+1 (;=2.71;)))
)

测试指令只有两条:i32.eqzi64.eqz。下面的例子展示了i32.eqz指令的用法:

export function testOps(a: i32): void {
  if (a == 0) { // i32.eqz
    printI32(123);
  }
}

下面是编译结果:

(func $testOps (type 0) (param i32)
  (if (i32.eqz (local.get 0))
    (then (call $printI32 (i32.const 123)))
  )
)

AS语言支持的关系运算符可以用比较指令实现,下面的例子展示了i32类型比较指令的用法:

export function relOps(a: i32, b: i32, c: u32, d:  u32): void {
  if (a == b) { printI32(0); } // i32.eq
  if (a != b) { printI32(1); } // i32.ne
  if (a <  b) { printI32(2); } // i32.lt_s
  if (c <  d) { printI32(3); } // i32.lt_u
  if (a >  b) { printI32(4); } // i32.gt_s
  if (c >  d) { printI32(5); } // i32.gt_u
  if (a <= b) { printI32(6); } // i32.le_s
  if (c <= d) { printI32(7); } // i32.le_u
  if (a >= b) { printI32(8); } // i32.ge_s
  if (c >= d) { printI32(9); } // i32.ge_u
}

下面是编译结果:

(func relOps (type 2) (param i32 i32 i32 i32)
  (if (i32.eq (local.get 0) (local.get 1))
    (then (call $printI32 (i32.const 0))))
  (if (i32.ne (local.get 0) (local.get 1))
    (then (call $printI32 (i32.const 1))))
  (if (i32.lt_s (local.get 0) (local.get 1))
    (then (call $printI32 (i32.const 2))))
  (if (i32.lt_u (local.get 2) (local.get 3))
    (then (call $printI32 (i32.const 3))))
  (if (i32.gt_s (local.get 0) (local.get 1))
    (then (call $printI32 (i32.const 4))))
  (if (i32.gt_u (local.get 2) (local.get 3))
    (then (call $printI32 (i32.const 5))))
  (if (i32.le_s (local.get 0) (local.get 1))
    (then (call $printI32 (i32.const 6))))
  (if (i32.le_u (local.get 2) (local.get 3))
    (then (call $printI32 (i32.const 7))))
  (if (i32.ge_s (local.get 0) (local.get 1))
    (then (call $printI32 (i32.const 8))))
  (if (i32.ge_u (local.get 2) (local.get 3))
    (then (call $printI32 (i32.const 9))))
)

除了浮点数取反运算以外,其他一元运算指令并没有直接被AS编译器使用,但是可以通过内置函数生成。下面的例子展示了i32f32类型一元运算指令的用法:

export function unOps(a: i32, b: f32): void {
  printI32(clz<i32>(a));     // i32.clz
  printI32(ctz<i32>(a));     // i32.ctz
  printI32(popcnt<i32>(a));  // i32.popcnt
  printF32(abs<f32>(b));     // f32.abs
  printF32(-b);              // f32.neg
  printF32(sqrt<f32>(b));    // f32.sqrt
  printF32(floor<f32>(b));   // f32.floor
  printF32(trunc<f32>(b));   // f32.trunc
  printF32(nearest<f32>(b)); // f32.nearest
}

下面是编译结果:

(func unOps (type 3) (param i32 f32 f32)
  (call $printI32 (i32.clz     (local.get 0)))
  (call $printI32 (i32.ctz     (local.get 0)))
  (call $printI32 (i32.popcnt  (local.get 0)))
  (call $printF32 (f32.abs     (local.get 1)))
  (call $printF32 (f32.neg     (local.get 1)))
  (call $printF32 (f32.sqrt    (local.get 1)))
  (call $printF32 (f32.floor   (local.get 1)))
  (call $printF32 (f32.trunc   (local.get 1)))
  (call $printF32 (f32.nearest (local.get 1)))
)

AS语言支持的二元运算符可以用二元运算指令实现,下面的例子展示了i32类型二元运算指令的用法:

export function binOps(a: i32, b: i32, c: u32, d: u32, e: f32, f: f32): void {
  printI32(a + b);           // i32.add
  printI32(a - b);           // i32.sub
  printI32(a * b);           // i32.mul
  printI32(a / b);           // i32.div_s
  printI32(c / d);           // i32.div_u
  printI32(a % b);           // i32.rem_s
  printI32(c % d);           // i32.rem_u
  printI32(a & b);           // i32.and
  printI32(a | b);           // i32.or
  printI32(a ^ b);           // i32.xor
  printI32(a << b);          // i32.shl
  printI32(a >> b);          // i32.shr_s
  printI32(a >>> b);         // i32.shr_u
  printI32(rotl<i32>(a, b)); // i32.rotl
  printI32(rotr<i32>(a, b)); // i32.rotr
}

由于AS语言没有“循环位移”运算符,所以我们只能通过内置函数来生成循环位移指令。下面是编译结果:

(func binOps (type 3) (param i32 i32 i32 i32 f32 f32)
  (call $printI32 (i32.add      (local.get 0) (local.get 1)))
  (call $printI32 (i32.sub      (local.get 0) (local.get 1)))
  (call $printI32 (i32.mul      (local.get 0) (local.get 1)))
  (call $printI32 (i32.div_s    (local.get 0) (local.get 1)))
  (call $printI32 (i32.div_s    (local.get 2) (local.get 3)))
  (call $printI32 (i32.rem_s    (local.get 0) (local.get 1)))
  (call $printI32 (i32.rem_s    (local.get 2) (local.get 3)))
  (call $printI32 (i32.and      (local.get 0) (local.get 1)))
  (call $printI32 (i32.or       (local.get 0) (local.get 1)))
  (call $printI32 (i32.xor      (local.get 0) (local.get 1)))
  (call $printI32 (i32.shl      (local.get 0) (local.get 1)))
  (call $printI32 (i32.shr_s    (local.get 0) (local.get 1)))
  (call $printI32 (i32.shr_u    (local.get 0) (local.get 1)))
  (call $printI32 (i32.rotl     (local.get 0) (local.get 1)))
  (call $printI32 (i32.rotr     (local.get 0) (local.get 1)))
)

AS语言中的类型转换操作可以通过类型转换指令实现,下面是一个例子:

export function cvtOps(a: i32, b: i64, c: u32, d: u64, e: f32, f: f64): void {
  printI32(b as i32); // i32.wrap_i64
  printI32(e as i32); // i32.trunc_f32_s
  printI32(e as u32); // i32.trunc_f32_u
  printI32(f as i32); // i32.trunc_f64_s
  printI32(f as u32); // i32.trunc_f64_u
  printI64(a);        // i64.extend_i32_s
  printI64(a as u32); // i64.extend_i32_u
  printI64(e as i64); // i64.trunc_f32_s
  printI64(e as u64); // i64.trunc_f32_u
  printI64(f as i64); // i64.trunc_f64_s
  printI64(f as u64); // i64.trunc_f64_u
  printF32(a as f32); // f32.convert_i32_s
  printF32(c as f32); // f32.convert_i32_u
  printF32(b as f32); // f32.convert_i64_s
  printF32(d as f32); // f32.convert_i64_u
  printF32(f as f32); // f32.demote_f64
  printF64(a as f64); // f64.convert_i32_s
  printF64(c as f64); // f64.convert_i32_u
  printF64(b as f64); // f64.convert_i64_s
  printF64(d as f64); // f64.convert_i64_u
  printF64(e);        // f64.promote_f32
  printI32(reinterpret<i32>(e)); // i32.reinterpret_f32
  printI64(reinterpret<i64>(f)); // i64.reinterpret_f64
  printF32(reinterpret<f32>(a)); // f32.reinterpret_i32
  printF64(reinterpret<f64>(b)); // f64.reinterpret_i64
}

下面是编译结果:

(func cvtOps (type 4) (param i32 i64 i32 i64 f32 f64)
  (call $printI32 (i32.wrap_i64        (local.get 1)))
  (call $printI32 (i32.trunc_f32_s     (local.get 4)))
  (call $printI32 (i32.trunc_f32_u     (local.get 4)))
  (call $printI32 (i32.trunc_f64_s     (local.get 5)))
  (call $printI32 (i32.trunc_f64_u     (local.get 5)))
  (call $printI64 (i64.extend_i32_s    (local.get 0)))
  (call $printI64 (i64.extend_i32_u    (local.get 0)))
  (call $printI64 (i64.trunc_f32_s     (local.get 4)))
  (call $printI64 (i64.trunc_f32_u     (local.get 4)))
  (call $printI64 (i64.trunc_f64_s     (local.get 5)))
  (call $printI64 (i64.trunc_f64_u     (local.get 5)))
  (call $printF32 (f32.convert_i32_s   (local.get 0)))
  (call $printF32 (f32.convert_i32_u   (local.get 2)))
  (call $printF32 (f32.convert_i64_s   (local.get 1)))
  (call $printF32 (f32.convert_i64_u   (local.get 3)))
  (call $printF32 (f32.demote_f64      (local.get 5)))
  (call $printF64 (f64.convert_i32_s   (local.get 0)))
  (call $printF64 (f64.convert_i32_u   (local.get 2)))
  (call $printF64 (f64.convert_i64_s   (local.get 1)))
  (call $printF64 (f64.convert_i64_u   (local.get 3)))
  (call $printF64 (f64.promote_f32     (local.get 4)))
  (call $printI32 (i32.reinterpret_f32 (local.get 4)))
  (call $printI64 (i64.reinterpret_f64 (local.get 5)))
  (call $printF32 (f32.reinterpret_i32 (local.get 0)))
  (call $printF64 (f64.reinterpret_i64 (local.get 1)))
)

总结

本文讨论了AS编译器如何通过各种Wasm指令来实现AS语法要素,简单来说:各种控制结构通过控制指令来实现、局部变量和全局变量的读写通过变量指令来实现、内存操作通过内存指令来实现、各种运算符和类型转换通过数值指令来实现。在后面的文章中,我们还将深入讨论AS如何实现面向对象编程和自动内存管理。

本文由CoinEx Chain团队Chase写作,转载无需授权。