Bug 1648885 and Bug 1649432: vendor latest Cranelift to get Spectre mitigations and fix fuzzbug. r=bbouvier

This patch pulls in Cranelift revision 47a218f908e6bdeb7a0fb65ed74e58a0b608080d, which incorporates several relevant changes: - It includes the Spectre mitigation for explicit heap bounds checks merged in PR bytecodealliance/wasmtime#1930, resolving Bug 1648885. - It includes the fix for an out-of-bounds subtraction on large shift amounts merged in PR bytecodealliance/wasmtime#1954, resolving Bug 1649432. We need to temporarily disable the `wasm/limits.js` jit-test on Cranelift configurations because it now needs shared memory to work, and the Cranelift backend does not support this yet. Given that this should be ready in the next month at most (requires atomics support on AArch64, which is currently being examined), it seems simpler to temporarily disable the test on aarch64 than to try to disentangle the bits that depend on shared memories explicitly. This patch also edits the `regexp/bug1445907.js` jit-test to run only if Wasm debugging is supported. This is needed for the test not to fail with `--wasm-compiler=cranelift` (which disables Baseline, the only Wasm compiler that supports debugging). Differential Revision: https://phabricator.services.mozilla.com/D81936
2020-07-02 15:47:56 +00:00 · 2020-07-02 15:47:56 +00:00 · 35cf81d389
commit 35cf81d389
parent bfc3fb787d
83 changed files with 6679 additions and 2707 deletions
--- a/.cargo/config.in
+++ b/.cargo/config.in
@ -65,7 +65,7 @@ rev = "3224e2dee65c0726c448484d4c3c43956b9330ec"
 [source."https://github.com/bytecodealliance/wasmtime"]
 git = "https://github.com/bytecodealliance/wasmtime"
 replace-with = "vendored-sources"
-rev = "238ae3bf2111847f60089656eb97fc9345295b1f"
+rev = "47a218f908e6bdeb7a0fb65ed74e58a0b608080d"

 [source."https://github.com/badboy/failure"]
 git = "https://github.com/badboy/failure"
--- a/Cargo.lock
+++ b/Cargo.lock
@ -734,7 +734,7 @@ dependencies = [
 [[package]]
 name = "cranelift-bforest"
 version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=238ae3bf2111847f60089656eb97fc9345295b1f#238ae3bf2111847f60089656eb97fc9345295b1f"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
 dependencies = [
 "cranelift-entity 0.65.0",
 ]
@ -742,7 +742,7 @@ dependencies = [
 [[package]]
 name = "cranelift-codegen"
 version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=238ae3bf2111847f60089656eb97fc9345295b1f#238ae3bf2111847f60089656eb97fc9345295b1f"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
 dependencies = [
 "byteorder",
 "cranelift-bforest",
@ -759,7 +759,7 @@ dependencies = [
 [[package]]
 name = "cranelift-codegen-meta"
 version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=238ae3bf2111847f60089656eb97fc9345295b1f#238ae3bf2111847f60089656eb97fc9345295b1f"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
 dependencies = [
 "cranelift-codegen-shared",
 "cranelift-entity 0.65.0",
@ -768,7 +768,7 @@ dependencies = [
 [[package]]
 name = "cranelift-codegen-shared"
 version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=238ae3bf2111847f60089656eb97fc9345295b1f#238ae3bf2111847f60089656eb97fc9345295b1f"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"

 [[package]]
 name = "cranelift-entity"
@ -778,12 +778,12 @@ source = "git+https://github.com/PLSysSec/lucet_sandbox_compiler?rev=5e870faf6f9
 [[package]]
 name = "cranelift-entity"
 version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=238ae3bf2111847f60089656eb97fc9345295b1f#238ae3bf2111847f60089656eb97fc9345295b1f"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"

 [[package]]
 name = "cranelift-frontend"
 version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=238ae3bf2111847f60089656eb97fc9345295b1f#238ae3bf2111847f60089656eb97fc9345295b1f"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
 dependencies = [
 "cranelift-codegen",
 "log",
@ -794,14 +794,14 @@ dependencies = [
 [[package]]
 name = "cranelift-wasm"
 version = "0.65.0"
-source = "git+https://github.com/bytecodealliance/wasmtime?rev=238ae3bf2111847f60089656eb97fc9345295b1f#238ae3bf2111847f60089656eb97fc9345295b1f"
+source = "git+https://github.com/bytecodealliance/wasmtime?rev=47a218f908e6bdeb7a0fb65ed74e58a0b608080d#47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
 dependencies = [
 "cranelift-codegen",
 "cranelift-entity 0.65.0",
 "cranelift-frontend",
 "log",
 "thiserror",
- "wasmparser 0.57.0",
+ "wasmparser 0.58.0",
 ]

 [[package]]
@ -5381,9 +5381,9 @@ checksum = "073da89bf1c84db000dd68ce660c1b4a08e3a2d28fd1e3394ab9e7abdde4a0f8"

 [[package]]
 name = "wasmparser"
-version = "0.57.0"
+version = "0.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32fddd575d477c6e9702484139cf9f23dcd554b06d185ed0f56c857dd3a47aa6"
+checksum = "721a8d79483738d7aef6397edcf8f04cd862640b1ad5973adf5bb50fc10e86db"

 [[package]]
 name = "wast"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -74,8 +74,8 @@ failure_derive = { git = "https://github.com/badboy/failure", rev = "64af847bc5f

 [patch.crates-io.cranelift-codegen]
 git = "https://github.com/bytecodealliance/wasmtime"
-rev = "238ae3bf2111847f60089656eb97fc9345295b1f"
+rev = "47a218f908e6bdeb7a0fb65ed74e58a0b608080d"

 [patch.crates-io.cranelift-wasm]
 git = "https://github.com/bytecodealliance/wasmtime"
-rev = "238ae3bf2111847f60089656eb97fc9345295b1f"
+rev = "47a218f908e6bdeb7a0fb65ed74e58a0b608080d"
--- a/js/src/jit-test/tests/regexp/bug1445907.js
+++ b/js/src/jit-test/tests/regexp/bug1445907.js
@ -1,4 +1,4 @@
-// |jit-test| skip-if: !wasmIsSupported()
+// |jit-test| skip-if: !wasmDebuggingIsSupported()

 // On ARM64, we failed to save x28 properly when generating code for the regexp
 // matcher.
--- a/js/src/jit-test/tests/wasm/limits.js
+++ b/js/src/jit-test/tests/wasm/limits.js
@ -1,3 +1,6 @@
+// |jit-test| skip-if: wasmCompilersPresent().match("cranelift")
+// (Reason: the Cranelift backend does not support shared memory yet.)
+
 // Tests of limits of memory and table types

 const PageSize = 65536;
--- a/js/src/wasm/cranelift/src/wasm2clif.rs
+++ b/js/src/wasm/cranelift/src/wasm2clif.rs
@ -29,8 +29,8 @@ use cranelift_codegen::ir::InstBuilder;
 use cranelift_codegen::isa::{CallConv, TargetFrontendConfig, TargetIsa};
 use cranelift_codegen::packed_option::PackedOption;
 use cranelift_wasm::{
-    FuncEnvironment, FuncIndex, GlobalIndex, GlobalVariable, MemoryIndex, ReturnMode,
-    SignatureIndex, TableIndex, TargetEnvironment, WasmError, WasmResult,
+    FuncEnvironment, FuncIndex, FunctionBuilder, GlobalIndex, GlobalVariable, MemoryIndex,
+    ReturnMode, SignatureIndex, TableIndex, TargetEnvironment, WasmError, WasmResult,
 };

 use crate::bindings::{self, GlobalDesc, SymbolicAddress};
@ -1085,6 +1085,7 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
        &mut self,
        mut pos: FuncCursor,
        table_index: TableIndex,
+        _table: ir::Table,
        delta: ir::Value,
        init_value: ir::Value,
    ) -> WasmResult<ir::Value> {
@ -1096,10 +1097,14 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module

    fn translate_table_get(
        &mut self,
-        mut pos: FuncCursor,
+        builder: &mut FunctionBuilder,
        table_index: TableIndex,
+        _table: ir::Table,
        index: ir::Value,
    ) -> WasmResult<ir::Value> {
+        // TODO(bug 1650038): make use of the `FunctionBuilder` here and its
+        // ability to edit the CFG in order to add a fast-path.
+        let mut pos = builder.cursor();
        let table_index = pos.ins().iconst(ir::types::I32, table_index.index() as i64);
        Ok(self
            .instance_call(&mut pos, &FN_TABLE_GET, &[index, table_index])
@ -1108,11 +1113,15 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module

    fn translate_table_set(
        &mut self,
-        mut pos: FuncCursor,
+        builder: &mut FunctionBuilder,
        table_index: TableIndex,
+        _table: ir::Table,
        value: ir::Value,
        index: ir::Value,
    ) -> WasmResult<()> {
+        // TODO(bug 1650038): make use of the `FunctionBuilder` here and its
+        // ability to edit the CFG in order to add a fast-path.
+        let mut pos = builder.cursor();
        let table_index = pos.ins().iconst(ir::types::I32, table_index.index() as i64);
        self.instance_call(&mut pos, &FN_TABLE_SET, &[index, value, table_index]);
        Ok(())
@ -1187,9 +1196,9 @@ impl<'static_env, 'module_env> FuncEnvironment for TransEnv<'static_env, 'module
    fn translate_ref_func(
        &mut self,
        mut pos: FuncCursor,
-        func_index: u32,
+        func_index: FuncIndex,
    ) -> WasmResult<ir::Value> {
-        let func_index = pos.ins().iconst(ir::types::I32, func_index as i64);
+        let func_index = pos.ins().iconst(ir::types::I32, func_index.index() as i64);
        Ok(self
            .instance_call(&mut pos, &FN_REF_FUNC, &[func_index])
            .unwrap())
--- a/third_party/rust/cranelift-codegen-meta/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen-meta/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"7c01a301a32e60cd9b0edd66f4cf8700e5de1d31607437ea756d4f8b0ae29a54","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"88532d2e2c9724dde968d6b046927249c33d2037ab3e3fd1bd7ebfa77fe12bc7","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"ccb5b5b4cb1861f036835c2ac56aefe2a477c8224fb6b681876461230aab67e5","src/isa/x86/instructions.rs":"1aee81c8bc0215fa1cad83e97a0915b24521ae61d503cd727a2406a25dd60f29","src/isa/x86/legalize.rs":"0809d49dbc49d35f33e027890265179ebfda3c55ed252040f8c4ff35d6ee7b02","src/isa/x86/mod.rs":"2b84474c2b0e272c1ebe32530c57f6b11133127c286c8f82c5ae5b6486386238","src/isa/x86/opcodes.rs":"ed8a0e536e290b2930a88816944692c4baa043684c00eafa51da183cdbb59f7d","src/isa/x86/recipes.rs":"c63469f430e457554acf1534f6fe8f37b41984d38d272e023aa0d93b778dc993","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"2d3e09ca34638e19621aef2492ca6943b105e6add830bd91bddbdc85277cb680","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"2f8cbb008778a49b60efac4647dffef654d225823e03ca6272af2678666dc423","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"06dae423ead6c1934fcf5813ddbd4f9983a4957e8ac9a17c88b014903bf71f41","src/shared/legalize.rs":"e8fd35104c1907c0e9453fb98372373aea20b54af10457156f6abd86929099dc","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"0b4f903de5f2df19304c44bf4bd456c3a8e165103b38ccb13b6f88ae8a3c7ee8","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
+{"files":{"Cargo.toml":"7c01a301a32e60cd9b0edd66f4cf8700e5de1d31607437ea756d4f8b0ae29a54","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"b123f056d0d458396679c5f7f2a16d2762af0258fcda4ac14b6655a95e5a0022","src/cdsl/ast.rs":"84a4b7e3301e3249716958a7aa4ea5ba8c6172e3c02f57ee3880504c4433ff19","src/cdsl/cpu_modes.rs":"996e45b374cfe85ac47c8c86c4459fe4c04b3158102b4c63b6ee434d5eed6a9e","src/cdsl/encodings.rs":"d884a564815a03c23369bcf31d13b122ae5ba84d0c80eda9312f0c0a829bf794","src/cdsl/formats.rs":"63e638305aa3ca6dd409ddf0e5e9605eeac1cc2631103e42fc6cbc87703d9b63","src/cdsl/instructions.rs":"41e1a230501de3f0da3960d8aa375c8bcd60ec62ede94ad61806816acbd8009a","src/cdsl/isa.rs":"ccabd6848b69eb069c10db61c7e7f86080777495714bb53d03e663c40541be94","src/cdsl/mod.rs":"0aa827923bf4c45e5ee2359573bd863e00f474acd532739f49dcd74a27553882","src/cdsl/operands.rs":"1c3411504de9c83112ff48e0ff1cfbb2e4ba5a9a15c1716f411ef31a4df59899","src/cdsl/recipes.rs":"80b7cd87332229b569e38086ceee8d557e679b9a32ad2e50bdb15c33337c3418","src/cdsl/regs.rs":"466a42a43355fc7623fe5d8e8d330622207a3af6a80cb9367bc0f06e224c9ee0","src/cdsl/settings.rs":"e6fd9a31925743b93b11f09c9c8271bab6aa2430aa053a2601957b4487df7d77","src/cdsl/type_inference.rs":"1efca8a095ffc899b7527bda6b9d9378c73d7283f8dceaa4819e8af599f8be21","src/cdsl/types.rs":"ff764c9e9c29a05677bff6164e7bc25a0c32655052d77ae580536abba8b1713b","src/cdsl/typevar.rs":"371ac795added2cb464371443313eb55350c629c62ce8e62e192129b6c41d45e","src/cdsl/xform.rs":"55da0c3f2403147b535ab6ae5d69c623fbe839edecf2a3af1de84420cd58402d","src/default_map.rs":"101bb0282a124f9c921f6bd095f529e8753621450d783c3273b0b0394c2c5c03","src/error.rs":"e9b11b2feb2d867b94c8810fdc5a6c4e0d9131604a0bfa5340ff2639a55100b4","src/gen_binemit.rs":"515e243420b30d1e01f8ea630282d9b6d78a715e1951f3f20392e19a48164442","src/gen_encodings.rs":"f00cded6b68a9b48c9e3cd39a8b6f0ba136f4062c8f8666109158a72c62c3ed1","src/gen_inst.rs":"88532d2e2c9724dde968d6b046927249c33d2037ab3e3fd1bd7ebfa77fe12bc7","src/gen_legalizer.rs":"ea229ab9393cc5ba2242f626e74c624ea59314535e74b26602dafb8e96481a72","src/gen_registers.rs":"a904119ed803c9de24dedd15149a65337ffc168bb1d63df53d7fdebfb5f4b158","src/gen_settings.rs":"f3cc3d31f6cc898f30606caf084f0de220db2d3b1b5e5e4145fa7c9a9a1597e2","src/gen_types.rs":"f6c090e1646a43bf2fe81ae0a7029cc6f7dc6d43285368f56d86c35a21c469a6","src/isa/arm32/mod.rs":"da18cb40c1a0a6b613ddefcc38a5d01d02c95de6f233ebd4ad84fefb992c008b","src/isa/arm64/mod.rs":"3a815eaa478d82b7f8b536b83f9debb6b79ec860f99fea6485f209a836c6939a","src/isa/mod.rs":"136141f99f217ba42b9e3f7f47238ab19cc974bb3bef2e2df7f7b5a683989d46","src/isa/riscv/encodings.rs":"8abb1968d917588bc5fc5f5be6dd66bdec23ac456ba65f8138237c8e891e843c","src/isa/riscv/mod.rs":"a7b461a30bbfbc1e3b33645422ff40d5b1761c30cb5d4a8aa12e9a3b7f7aee51","src/isa/riscv/recipes.rs":"5be3bf7c9ba3c51ece384b7eee75a8f7fa0cbacc6a5babc9d0e1d92a2e54a4c2","src/isa/x86/encodings.rs":"2b3c5105e32bce932d2628963cc5c853207e37204a6aec38caace60e52870bbe","src/isa/x86/instructions.rs":"1aee81c8bc0215fa1cad83e97a0915b24521ae61d503cd727a2406a25dd60f29","src/isa/x86/legalize.rs":"ddc834ae8f4a06ca8e3fccf7aef6a097163a2f8d258a7cbc3cc6a8b93c9c0413","src/isa/x86/mod.rs":"2b84474c2b0e272c1ebe32530c57f6b11133127c286c8f82c5ae5b6486386238","src/isa/x86/opcodes.rs":"79d42b71f78119f4ca1dc4fc90bc9efb04c6fc526e01cbe79368aa59f117266a","src/isa/x86/recipes.rs":"c63469f430e457554acf1534f6fe8f37b41984d38d272e023aa0d93b778dc993","src/isa/x86/registers.rs":"4be0a45d8acd465c31746b7976124025b06b453e3f6d587f93efb5af0e12b1a8","src/isa/x86/settings.rs":"2d3e09ca34638e19621aef2492ca6943b105e6add830bd91bddbdc85277cb680","src/lib.rs":"2491b0e74078914cb89d1778fa8174daf723fe76aaf7fed18741237d68f6df32","src/shared/entities.rs":"90f774a70e1c2a2e9a553c07a5e80e0fe54cf127434bd83e67274bba4e1a19ba","src/shared/formats.rs":"2f8cbb008778a49b60efac4647dffef654d225823e03ca6272af2678666dc423","src/shared/immediates.rs":"e4a57657f6af9853794804eb41c01204a2c13a632f44f55d90e156a4b98c5f65","src/shared/instructions.rs":"5ffa26a91b344fb7014a34e0d97b4df90d604a5bd49a49a75c262591deb8e6c4","src/shared/legalize.rs":"e8fd35104c1907c0e9453fb98372373aea20b54af10457156f6abd86929099dc","src/shared/mod.rs":"c219625990bf15507ac1077b349ce20e5312d4e4707426183676d469e78792b7","src/shared/settings.rs":"7800f51d97a95d572310f6c80ded59c1c84cf3ba06f9425f4205f88ac46b4e98","src/shared/types.rs":"4702df132f4b5d70cc9411ec5221ba0b1bd4479252274e0223ae57b6d0331247","src/srcgen.rs":"dcfc159c8599270f17e6a978c4be255abca51556b5ef0da497faec4a4a1e62ce","src/unique_table.rs":"31aa54330ca4786af772d32e8cb6158b6504b88fa93fe177bf0c6cbe545a8d35"},"package":null}
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
@ -231,6 +231,32 @@ impl PerCpuModeEncodings {
        });
    }

+    /// Add encodings for `inst.r32` to X86_32.
+    /// Add encodings for `inst.r32` to X86_64 with and without REX.
+    /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
+    fn enc_r32_r64_instp(
+        &mut self,
+        inst: &Instruction,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc32_func(inst.bind(R32), template.nonrex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+
+        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
+        // reg-alloc would never use r8 and up.
+        self.enc64_func(inst.bind(R32), template.rex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst.bind(R32), template.nonrex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst.bind(R64), template.rex().w(), |builder| {
+            builder.inst_predicate(instp)
+        });
+    }
+
    /// Add encodings for `inst.r32` to X86_32.
    /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
    fn enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
@ -810,6 +836,11 @@ fn define_memory(
            recipe.opcodes(&MOV_LOAD),
            is_load_complex_length_two.clone(),
        );
+        e.enc_r32_r64_instp(
+            load_complex,
+            recipe.opcodes(&MOV_LOAD),
+            is_load_complex_length_two.clone(),
+        );
        e.enc_x86_64_instp(
            uload32_complex,
            recipe.opcodes(&MOV_LOAD),
@ -855,6 +886,11 @@ fn define_memory(
            recipe.opcodes(&MOV_STORE),
            is_store_complex_length_three.clone(),
        );
+        e.enc_r32_r64_instp(
+            store_complex,
+            recipe.opcodes(&MOV_STORE),
+            is_store_complex_length_three.clone(),
+        );
        e.enc_x86_64_instp(
            istore32_complex,
            recipe.opcodes(&MOV_STORE),
@ -948,6 +984,10 @@ fn define_memory(
        e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0);
        e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0);
    }
+    for &ty in &[R64, R32] {
+        e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
+        e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
+    }

    // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.

@ -1355,6 +1395,7 @@ fn define_alu(
    let rotr = shared.by_name("rotr");
    let rotr_imm = shared.by_name("rotr_imm");
    let selectif = shared.by_name("selectif");
+    let selectif_spectre_guard = shared.by_name("selectif_spectre_guard");
    let sshr = shared.by_name("sshr");
    let sshr_imm = shared.by_name("sshr_imm");
    let trueff = shared.by_name("trueff");
@ -1568,6 +1609,11 @@ fn define_alu(

    // Conditional move (a.k.a integer select).
    e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW));
+    // A Spectre-guard integer select is exactly the same as a selectif, but
+    // is not associated with any other legalization rules and is not
+    // recognized by any optimizations, so it must arrive here unmodified
+    // and in its original place.
+    e.enc_i32_i64(selectif_spectre_guard, rec_cmov.opcodes(&CMOV_OVERFLOW));
 }

 #[inline(never)]
@ -1596,10 +1642,9 @@ fn define_simd(
    let fdiv = shared.by_name("fdiv");
    let fill = shared.by_name("fill");
    let fill_nop = shared.by_name("fill_nop");
-    let fmax = shared.by_name("fmax");
-    let fmin = shared.by_name("fmin");
    let fmul = shared.by_name("fmul");
    let fsub = shared.by_name("fsub");
+    let iabs = shared.by_name("iabs");
    let iadd = shared.by_name("iadd");
    let icmp = shared.by_name("icmp");
    let imul = shared.by_name("imul");
@ -1635,7 +1680,10 @@ fn define_simd(
    let usub_sat = shared.by_name("usub_sat");
    let vconst = shared.by_name("vconst");
    let vselect = shared.by_name("vselect");
+    let x86_cvtt2si = x86.by_name("x86_cvtt2si");
    let x86_insertps = x86.by_name("x86_insertps");
+    let x86_fmax = x86.by_name("x86_fmax");
+    let x86_fmin = x86.by_name("x86_fmin");
    let x86_movlhps = x86.by_name("x86_movlhps");
    let x86_movsd = x86.by_name("x86_movsd");
    let x86_packss = x86.by_name("x86_packss");
@ -1902,6 +1950,13 @@ fn define_simd(
            rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS),
            Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F
        );
+
+        e.enc_both_inferred(
+            x86_cvtt2si
+                .bind(vector(I32, sse_vector_size))
+                .bind(vector(F32, sse_vector_size)),
+            rec_furm.opcodes(&CVTTPS2DQ),
+        );
    }

    // SIMD vconst for special cases (all zeroes, all ones)
@ -2136,6 +2191,12 @@ fn define_simd(
        e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes));
    }

+    // SIMD integer absolute value.
+    for (ty, opcodes) in &[(I8, &PABSB[..]), (I16, &PABSW[..]), (I32, &PABSD)] {
+        let iabs = iabs.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(iabs, rec_furm.opcodes(opcodes), Some(use_ssse3_simd));
+    }
+
    // SIMD logical operations
    let band = shared.by_name("band");
    let band_not = shared.by_name("band_not");
@ -2268,10 +2329,10 @@ fn define_simd(
        (F64, fmul, &MULPD[..]),
        (F32, fdiv, &DIVPS[..]),
        (F64, fdiv, &DIVPD[..]),
-        (F32, fmin, &MINPS[..]),
-        (F64, fmin, &MINPD[..]),
-        (F32, fmax, &MAXPS[..]),
-        (F64, fmax, &MAXPD[..]),
+        (F32, x86_fmin, &MINPS[..]),
+        (F64, x86_fmin, &MINPD[..]),
+        (F32, x86_fmax, &MAXPS[..]),
+        (F64, x86_fmax, &MAXPD[..]),
    ] {
        let inst = inst.bind(vector(*ty, sse_vector_size));
        e.enc_both_inferred(inst, rec_fa.opcodes(opcodes));
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
@ -379,9 +379,12 @@ fn define_simd(
    let bnot = insts.by_name("bnot");
    let bxor = insts.by_name("bxor");
    let extractlane = insts.by_name("extractlane");
+    let fabs = insts.by_name("fabs");
    let fcmp = insts.by_name("fcmp");
    let fcvt_from_uint = insts.by_name("fcvt_from_uint");
-    let fabs = insts.by_name("fabs");
+    let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
+    let fmax = insts.by_name("fmax");
+    let fmin = insts.by_name("fmin");
    let fneg = insts.by_name("fneg");
    let iadd_imm = insts.by_name("iadd_imm");
    let icmp = insts.by_name("icmp");
@ -788,6 +791,9 @@ fn define_simd(
    narrow.custom_legalize(ineg, "convert_ineg");
    narrow.custom_legalize(ushr, "convert_ushr");
    narrow.custom_legalize(ishl, "convert_ishl");
+    narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");
+    narrow.custom_legalize(fmin, "expand_minmax_vector");
+    narrow.custom_legalize(fmax, "expand_minmax_vector");

    narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
    narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
--- a/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
@ -103,6 +103,10 @@ pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a];
 /// float-point value.
 pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a];

+/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed
+/// doubleword values in xmm1 using truncation (SSE2).
+pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b];
+
 /// Convert with truncation scalar double-precision floating-point value to signed
 /// integer.
 pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c];
@ -299,6 +303,17 @@ pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
 /// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE).
 pub static ORPS: [u8; 2] = [0x0f, 0x56];

+/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3).
+pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c];
+
+/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in
+/// xmm1 (SSSE3).
+pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e];
+
+/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in
+/// xmm1 (SSSE3).
+pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];
+
 /// Converts 8 packed signed word integers from xmm1 and from xxm2/m128 into 16 packed signed byte
 /// integers in xmm1 using signed saturation (SSE2).
 pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
--- a/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
@ -211,7 +211,7 @@ fn define_control_flow(
    let iAddr = &TypeVar::new(
        "iAddr",
        "An integer address type",
-        TypeSetBuilder::new().ints(32..64).build(),
+        TypeSetBuilder::new().ints(32..64).refs(32..64).build(),
    );

    {
@ -549,9 +549,9 @@ fn define_simd_lane_access(
            r#"
        Vector swizzle.

-        Returns a new vector with byte-width lanes selected from the lanes of the first input 
-        vector ``x`` specified in the second input vector ``s``. The indices ``i`` in range 
-        ``[0, 15]`` select the ``i``-th element of ``x``. For indices outside of the range the 
+        Returns a new vector with byte-width lanes selected from the lanes of the first input
+        vector ``x`` specified in the second input vector ``s``. The indices ``i`` in range
+        ``[0, 15]`` select the ``i``-th element of ``x``. For indices outside of the range the
        resulting lane is 0. Note that this operates on byte-width lanes.
        "#,
            &formats.binary,
@ -744,7 +744,7 @@ pub(crate) fn define(
    let iAddr = &TypeVar::new(
        "iAddr",
        "An integer address type",
-        TypeSetBuilder::new().ints(32..64).build(),
+        TypeSetBuilder::new().ints(32..64).refs(32..64).build(),
    );

    let Ref = &TypeVar::new(
@ -1176,7 +1176,7 @@ pub(crate) fn define(
        Inst::new(
            "uload8x8",
            r#"
-        Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i16x8 
+        Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i16x8
        vector.
        "#,
            &formats.load,
@ -1190,7 +1190,7 @@ pub(crate) fn define(
        Inst::new(
            "uload8x8_complex",
            r#"
-        Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an 
+        Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
        i16x8 vector.
        "#,
            &formats.load_complex,
@ -1204,7 +1204,7 @@ pub(crate) fn define(
        Inst::new(
            "sload8x8",
            r#"
-        Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i16x8 
+        Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i16x8
        vector.
        "#,
            &formats.load,
@ -1218,7 +1218,7 @@ pub(crate) fn define(
        Inst::new(
            "sload8x8_complex",
            r#"
-        Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an 
+        Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
        i16x8 vector.
        "#,
            &formats.load_complex,
@ -1243,7 +1243,7 @@ pub(crate) fn define(
        Inst::new(
            "uload16x4",
            r#"
-        Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4 
+        Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
        vector.
        "#,
            &formats.load,
@ -1257,7 +1257,7 @@ pub(crate) fn define(
        Inst::new(
            "uload16x4_complex",
            r#"
-        Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an 
+        Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
        i32x4 vector.
        "#,
            &formats.load_complex,
@ -1271,7 +1271,7 @@ pub(crate) fn define(
        Inst::new(
            "sload16x4",
            r#"
-        Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i32x4 
+        Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i32x4
        vector.
        "#,
            &formats.load,
@ -1285,7 +1285,7 @@ pub(crate) fn define(
        Inst::new(
            "sload16x4_complex",
            r#"
-        Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an 
+        Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
        i32x4 vector.
        "#,
            &formats.load_complex,
@ -1310,7 +1310,7 @@ pub(crate) fn define(
        Inst::new(
            "uload32x2",
            r#"
-        Load an 32x2 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i64x2 
+        Load an 32x2 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i64x2
        vector.
        "#,
            &formats.load,
@ -1324,7 +1324,7 @@ pub(crate) fn define(
        Inst::new(
            "uload32x2_complex",
            r#"
-        Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an 
+        Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
        i64x2 vector.
        "#,
            &formats.load_complex,
@ -1338,7 +1338,7 @@ pub(crate) fn define(
        Inst::new(
            "sload32x2",
            r#"
-        Load a 32x2 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i64x2 
+        Load a 32x2 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i64x2
        vector.
        "#,
            &formats.load,
@ -1352,7 +1352,7 @@ pub(crate) fn define(
        Inst::new(
            "sload32x2_complex",
            r#"
-        Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an 
+        Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
        i64x2 vector.
        "#,
            &formats.load_complex,
@ -1748,6 +1748,34 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

+    ig.push(
+        Inst::new(
+            "selectif_spectre_guard",
+            r#"
+            Conditional select intended for Spectre guards.
+
+            This operation is semantically equivalent to a selectif instruction.
+            However, it is guaranteed to not be removed or otherwise altered by any
+            optimization pass, and is guaranteed to result in a conditional-move
+            instruction, not a branch-based lowering.  As such, it is suitable
+            for use when producing Spectre guards. For example, a bounds-check
+            may guard against unsafe speculation past a bounds-check conditional
+            branch by passing the address or index to be accessed through a
+            conditional move, also gated on the same condition. Because no
+            Spectre-vulnerable processors are known to perform speculation on
+            conditional move instructions, this is guaranteed to pick the
+            correct input. If the selected input in case of overflow is a "safe"
+            value, for example a null pointer that causes an exception in the
+            speculative path, this ensures that no Spectre vulnerability will
+            exist.
+            "#,
+            &formats.int_select,
+        )
+        .operands_in(vec![cc, flags, x, y])
+        .operands_out(vec![a])
+        .other_side_effects(true),
+    );
+
    let c = &Operand::new("c", Any).with_doc("Controlling value to test");
    ig.push(
        Inst::new(
@ -2347,6 +2375,18 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

+    ig.push(
+        Inst::new(
+            "iabs",
+            r#"
+        Integer absolute value with wrapping: `a := |x|`.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
    ig.push(
        Inst::new(
            "imul",
--- a/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs
@ -264,5 +264,23 @@ pub(crate) fn define() -> SettingGroup {
        true,
    );

+    // Spectre options.
+
+    settings.add_bool(
+        "enable_heap_access_spectre_mitigation",
+        r#"
+        Enable Spectre mitigation on heap bounds checks.
+        
+        This is a no-op for any heap that needs no bounds checks; e.g.,
+        if the limit is static and the guard region is large enough that
+        the index cannot reach past it.
+
+        This option is enabled by default because it is highly
+        recommended for secure sandboxing. The embedder should consider
+        the security implications carefully before disabling this option.
+        "#,
+        true,
+    );
+
    settings.build()
 }
--- a/third_party/rust/cranelift-codegen/.cargo-checksum.json
+++ b/third_party/rust/cranelift-codegen/.cargo-checksum.json
--- a/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs
+++ b/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs
@ -302,7 +302,11 @@ fn fallthroughs(func: &mut Function) {
                Opcode::Fallthrough => {
                    // Somebody used a fall-through instruction before the branch relaxation pass.
                    // Make sure it is correct, i.e. the destination is the layout successor.
-                    debug_assert_eq!(destination, succ, "Illegal fall-through in {}", block)
+                    debug_assert_eq!(
+                        destination, succ,
+                        "Illegal fallthrough from {} to {}, but {}'s successor is {}",
+                        block, destination, block, succ
+                    )
                }
                Opcode::Jump => {
                    // If this is a jump to the successor block, change it to a fall-through.
--- a/third_party/rust/cranelift-codegen/src/binemit/stackmap.rs
+++ b/third_party/rust/cranelift-codegen/src/binemit/stackmap.rs
@ -103,7 +103,7 @@ impl Stackmap {

        // Refer to the doc comment for `Stackmap` above to understand the
        // bitmap representation used here.
-        let map_size = (dbg!(info.frame_size) + dbg!(info.inbound_args_size)) as usize;
+        let map_size = (info.frame_size + info.inbound_args_size) as usize;
        let word_size = isa.pointer_bytes() as usize;
        let num_words = map_size / word_size;

--- a/third_party/rust/cranelift-codegen/src/ir/extfunc.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/extfunc.rs
@ -383,7 +383,7 @@ pub struct ExtFuncData {
    /// flag is best used when the target is known to be in the same unit of code generation, such
    /// as a Wasm module.
    ///
-    /// See the documentation for [`RelocDistance`](machinst::RelocDistance) for more details. A
+    /// See the documentation for [`RelocDistance`](crate::machinst::RelocDistance) for more details. A
    /// `colocated` flag value of `true` implies `RelocDistance::Near`.
    pub colocated: bool,
 }
--- a/third_party/rust/cranelift-codegen/src/ir/globalvalue.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/globalvalue.rs
@ -66,7 +66,7 @@ pub enum GlobalValueData {
        ///
        /// If `true`, some backends may use relocation forms that have limited range: for example,
        /// a +/- 2^27-byte range on AArch64. See the documentation for
-        /// [`RelocDistance`](machinst::RelocDistance) for more details.
+        /// [`RelocDistance`](crate::machinst::RelocDistance) for more details.
        colocated: bool,

        /// Does this symbol refer to a thread local storage value?
--- a/third_party/rust/cranelift-codegen/src/ir/valueloc.rs
+++ b/third_party/rust/cranelift-codegen/src/ir/valueloc.rs
@ -41,7 +41,7 @@ impl ValueLoc {
    pub fn unwrap_reg(self) -> RegUnit {
        match self {
            Self::Reg(ru) => ru,
-            _ => panic!("Expected register: {:?}", self),
+            _ => panic!("unwrap_reg expected register, found {:?}", self),
        }
    }

@ -49,7 +49,7 @@ impl ValueLoc {
    pub fn unwrap_stack(self) -> StackSlot {
        match self {
            Self::Stack(ss) => ss,
-            _ => panic!("Expected stack slot: {:?}", self),
+            _ => panic!("unwrap_stack expected stack slot, found {:?}", self),
        }
    }

--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
@ -86,9 +86,9 @@
 //!   formal arguments, would:
 //!   - Accept a pointer P to the struct return area in x0 on entry.
 //!   - Return v3 in x0.
-//!   - Return v2 in memory at [P].
-//!   - Return v1 in memory at [P+8].
-//!   - Return v0 in memory at [P+16].
+//!   - Return v2 in memory at `[P]`.
+//!   - Return v1 in memory at `[P+8]`.
+//!   - Return v0 in memory at `[P+16]`.

 use crate::ir;
 use crate::ir::types;
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
@ -361,6 +361,20 @@ fn enc_vec_rr_misc(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
    bits | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
 }

+fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+    debug_assert_eq!(q & 0b1, q);
+    debug_assert_eq!(u & 0b1, u);
+    debug_assert_eq!(size & 0b11, size);
+    debug_assert_eq!(opcode & 0b11111, opcode);
+    0b0_0_0_01110_00_11000_0_0000_10_00000_00000
+        | q << 30
+        | u << 29
+        | size << 22
+        | opcode << 12
+        | machreg_to_vec(rn) << 5
+        | machreg_to_vec(rd.to_reg())
+}
+
 /// State carried between emissions of a sequence of instructions.
 #[derive(Default, Clone, Debug)]
 pub struct EmitState {
@ -1061,6 +1075,18 @@ impl MachInstEmit for Inst {
                };
                sink.put4(enc_vec_rr_misc(bits_12_16, rd, rn));
            }
+            &Inst::VecLanes { op, rd, rn, ty } => {
+                let (q, size) = match ty {
+                    I8X16 => (0b1, 0b00),
+                    I16X8 => (0b1, 0b01),
+                    I32X4 => (0b1, 0b10),
+                    _ => unreachable!(),
+                };
+                let (u, opcode) = match op {
+                    VecLanesOp::Uminv => (0b1, 0b11010),
+                };
+                sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
+            }
            &Inst::FpuCmp32 { rn, rm } => {
                sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
            }
@ -1247,12 +1273,17 @@ impl MachInstEmit for Inst {
                alu_op,
                ty,
            } => {
-                let enc_size_for_cmp = match ty {
+                let enc_size = match ty {
                    I8X16 => 0b00,
                    I16X8 => 0b01,
                    I32X4 => 0b10,
                    _ => 0,
                };
+                let enc_size_for_fcmp = match ty {
+                    F32X4 => 0b0,
+                    F64X2 => 0b1,
+                    _ => 0,
+                };

                let (top11, bit15_10) = match alu_op {
                    VecALUOp::SQAddScalar => {
@ -1271,12 +1302,15 @@ impl MachInstEmit for Inst {
                        debug_assert_eq!(I64, ty);
                        (0b011_11110_11_1, 0b001011)
                    }
-                    VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b100011),
-                    VecALUOp::Cmge => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
-                    VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
-                    VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
-                    VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
-                    // The following instructions operate on bytes, so are not encoded differently
+                    VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011),
+                    VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111),
+                    VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
+                    VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size << 1, 0b001101),
+                    VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size << 1, 0b001111),
+                    VecALUOp::Fcmeq => (0b010_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
+                    VecALUOp::Fcmgt => (0b011_01110_10_1 | enc_size_for_fcmp << 1, 0b111001),
+                    VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
+                    // The following logical instructions operate on bytes, so are not encoded differently
                    // for the different vector types.
                    VecALUOp::And => {
                        debug_assert_eq!(128, ty_bits(ty));
@ -1298,6 +1332,7 @@ impl MachInstEmit for Inst {
                        debug_assert_eq!(128, ty_bits(ty));
                        (0b011_01110_01_1, 0b000111)
                    }
+                    VecALUOp::Umaxp => (0b011_01110_00_1 | enc_size << 1, 0b101001),
                };
                sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
            }
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
@ -2209,6 +2209,42 @@ fn test_aarch64_binemit() {
        "cmhs v8.4s, v2.4s, v15.4s",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fcmeq,
+            rd: writable_vreg(28),
+            rn: vreg(12),
+            rm: vreg(4),
+            ty: F32X4,
+        },
+        "9CE5244E",
+        "fcmeq v28.4s, v12.4s, v4.4s",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fcmgt,
+            rd: writable_vreg(3),
+            rn: vreg(16),
+            rm: vreg(31),
+            ty: F64X2,
+        },
+        "03E6FF6E",
+        "fcmgt v3.2d, v16.2d, v31.2d",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Fcmge,
+            rd: writable_vreg(18),
+            rn: vreg(23),
+            rm: vreg(0),
+            ty: F64X2,
+        },
+        "F2E6606E",
+        "fcmge v18.2d, v23.2d, v0.2d",
+    ));
+
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::And,
@ -2269,6 +2305,42 @@ fn test_aarch64_binemit() {
        "bsl v8.16b, v9.16b, v1.16b",
    ));

+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umaxp,
+            rd: writable_vreg(8),
+            rn: vreg(12),
+            rm: vreg(1),
+            ty: I8X16,
+        },
+        "88A5216E",
+        "umaxp v8.16b, v12.16b, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umaxp,
+            rd: writable_vreg(1),
+            rn: vreg(6),
+            rm: vreg(1),
+            ty: I16X8,
+        },
+        "C1A4616E",
+        "umaxp v1.8h, v6.8h, v1.8h",
+    ));
+
+    insns.push((
+        Inst::VecRRR {
+            alu_op: VecALUOp::Umaxp,
+            rd: writable_vreg(1),
+            rn: vreg(20),
+            rm: vreg(16),
+            ty: I32X4,
+        },
+        "81A6B06E",
+        "umaxp v1.4s, v20.4s, v16.4s",
+    ));
+
    insns.push((
        Inst::VecMisc {
            op: VecMisc2::Not,
@ -2280,6 +2352,39 @@ fn test_aarch64_binemit() {
        "mvn v2.16b, v1.16b",
    ));

+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Uminv,
+            rd: writable_vreg(2),
+            rn: vreg(1),
+            ty: I8X16,
+        },
+        "22A8316E",
+        "uminv b2, v1.16b",
+    ));
+
+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Uminv,
+            rd: writable_vreg(3),
+            rn: vreg(11),
+            ty: I16X8,
+        },
+        "63A9716E",
+        "uminv h3, v11.8h",
+    ));
+
+    insns.push((
+        Inst::VecLanes {
+            op: VecLanesOp::Uminv,
+            rd: writable_vreg(18),
+            rn: vreg(4),
+            ty: I32X4,
+        },
+        "92A8B16E",
+        "uminv s18, v4.4s",
+    ));
+
    insns.push((
        Inst::Extend {
            rd: writable_xreg(1),
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
@ -304,6 +304,14 @@ impl Imm12 {
        }
    }

+    /// Create a zero immediate of this format.
+    pub fn zero() -> Self {
+        Imm12 {
+            bits: 0,
+            shift12: false,
+        }
+    }
+
    /// Bits for 2-bit "shift" field in e.g. AddI.
    pub fn shift_bits(&self) -> u32 {
        if self.shift12 {
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
@ -225,6 +225,12 @@ pub enum VecALUOp {
    Cmhs,
    /// Compare unsigned higher or same
    Cmhi,
+    /// Floating-point compare equal
+    Fcmeq,
+    /// Floating-point compare greater than
+    Fcmgt,
+    /// Floating-point compare greater than or equal
+    Fcmge,
    /// Bitwise and
    And,
    /// Bitwise bit clear
@ -235,6 +241,8 @@ pub enum VecALUOp {
    Eor,
    /// Bitwise select
    Bsl,
+    /// Unsigned maximum pairwise
+    Umaxp,
 }

 /// A Vector miscellaneous operation with two registers.
@ -244,6 +252,13 @@ pub enum VecMisc2 {
    Not,
 }

+/// An operation across the lanes of vectors.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecLanesOp {
+    /// Unsigned minimum across a vector
+    Uminv,
+}
+
 /// An operation on the bits of a register. This can be paired with several instruction formats
 /// below (see `Inst`) in any combination.
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
@ -743,6 +758,14 @@ pub enum Inst {
        ty: Type,
    },

+    /// Vector instruction across lanes.
+    VecLanes {
+        op: VecLanesOp,
+        rd: Writable<Reg>,
+        rn: Reg,
+        ty: Type,
+    },
+
    /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
    MovToNZCV {
        rn: Reg,
@ -876,7 +899,7 @@ pub enum Inst {
    },

    /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
-    /// controls MemArg::NominalSPOffset args are lowered.
+    /// controls how MemArg::NominalSPOffset args are lowered.
    VirtualSPOffsetAdj {
        offset: i64,
    },
@ -1214,6 +1237,11 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_def(rd);
            collector.add_use(rn);
        }
+
+        &Inst::VecLanes { rd, rn, .. } => {
+            collector.add_def(rd);
+            collector.add_use(rn);
+        }
        &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
            collector.add_use(rn);
            collector.add_use(rm);
@ -1708,6 +1736,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_def(mapper, rd);
            map_use(mapper, rn);
        }
+        &mut Inst::VecLanes {
+            ref mut rd,
+            ref mut rn,
+            ..
+        } => {
+            map_def(mapper, rd);
+            map_use(mapper, rn);
+        }
        &mut Inst::FpuCmp32 {
            ref mut rn,
            ref mut rm,
@ -2055,7 +2091,9 @@ impl MachInst for Inst {
            I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
            F32 | F64 => Ok(RegClass::V128),
            IFLAGS | FFLAGS => Ok(RegClass::I64),
-            B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 => Ok(RegClass::V128),
+            B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 | F32X4 | F64X2 => {
+                Ok(RegClass::V128)
+            }
            _ => Err(CodegenError::Unsupported(format!(
                "Unexpected SSA-value type: {}",
                ty
@ -2482,7 +2520,7 @@ impl ShowWithRRU for Inst {
                let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
                    |reg, mb_rru| show_vreg_vector(reg, mb_rru, F32X2)
                } else {
-                    show_vreg_scalar
+                    |reg, mb_rru| show_vreg_scalar(reg, mb_rru, F64)
                };
                let rd = show_vreg_fn(rd.to_reg(), mb_rru);
                let rn = show_vreg_fn(rn, mb_rru);
@ -2690,17 +2728,21 @@ impl ShowWithRRU for Inst {
                    VecALUOp::Cmgt => ("cmgt", true, ty),
                    VecALUOp::Cmhs => ("cmhs", true, ty),
                    VecALUOp::Cmhi => ("cmhi", true, ty),
+                    VecALUOp::Fcmeq => ("fcmeq", true, ty),
+                    VecALUOp::Fcmgt => ("fcmgt", true, ty),
+                    VecALUOp::Fcmge => ("fcmge", true, ty),
                    VecALUOp::And => ("and", true, I8X16),
                    VecALUOp::Bic => ("bic", true, I8X16),
                    VecALUOp::Orr => ("orr", true, I8X16),
                    VecALUOp::Eor => ("eor", true, I8X16),
                    VecALUOp::Bsl => ("bsl", true, I8X16),
+                    VecALUOp::Umaxp => ("umaxp", true, ty),
                };

                let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
                    |reg, mb_rru, ty| show_vreg_vector(reg, mb_rru, ty)
                } else {
-                    |reg, mb_rru, _ty| show_vreg_scalar(reg, mb_rru)
+                    |reg, mb_rru, _ty| show_vreg_scalar(reg, mb_rru, I64)
                };

                let rd = show_vreg_fn(rd.to_reg(), mb_rru, ty);
@ -2722,6 +2764,15 @@ impl ShowWithRRU for Inst {
                let rn = show_vreg_vector(rn, mb_rru, ty);
                format!("{} {}, {}", op, rd, rn)
            }
+            &Inst::VecLanes { op, rd, rn, ty } => {
+                let op = match op {
+                    VecLanesOp::Uminv => "uminv",
+                };
+
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ty);
+                let rn = show_vreg_vector(rn, mb_rru, ty);
+                format!("{} {}, {}", op, rd, rn)
+            }
            &Inst::MovToNZCV { rn } => {
                let rn = rn.show_rru(mb_rru);
                format!("msr nzcv, {}", rn)
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
@ -292,7 +292,7 @@ pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSiz
 }

 /// Show a vector register used in a scalar context.
-pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
+pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
    let mut s = reg.show_rru(mb_rru);
    if reg.get_class() != RegClass::V128 {
        // We can't do any better.
@ -302,7 +302,14 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
    if reg.is_real() {
        // Change (eg) "v0" into "d0".
        if reg.get_class() == RegClass::V128 && s.starts_with("v") {
-            s.replace_range(0..1, "d");
+            let replacement = match ty {
+                I64 | F64 => "d",
+                I8X16 => "b",
+                I16X8 => "h",
+                I32X4 => "s",
+                _ => unimplemented!(),
+            };
+            s.replace_range(0..1, replacement);
        }
    } else {
        // Add a "d" suffix to RegClass::V128 vregs.
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
@ -14,7 +14,7 @@ use crate::ir::Inst as IRInst;
 use crate::ir::{InstructionData, Opcode, TrapCode, Type};
 use crate::machinst::lower::*;
 use crate::machinst::*;
-use crate::CodegenResult;
+use crate::{CodegenError, CodegenResult};

 use crate::isa::aarch64::inst::*;
 use crate::isa::aarch64::AArch64Backend;
@ -168,7 +168,7 @@ pub(crate) fn output_to_const_f128<C: LowerCtx<I = Inst>>(
 }

 /// How to handle narrow values loaded into registers; see note on `narrow_mode`
-/// parameter to `input_to_*` below.
+/// parameter to `put_input_in_*` below.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub(crate) enum NarrowValueMode {
    None,
@ -193,7 +193,7 @@ impl NarrowValueMode {
 }

 /// Allocate a register for an instruction output and return it.
-pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
+pub(crate) fn get_output_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
    ctx.get_output(out.insn, out.output)
 }

@ -202,12 +202,12 @@ pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput)
 /// The given register will be extended appropriately, according to
 /// `narrow_mode` and the input's type. If extended, the value is
 /// always extended to 64 bits, for simplicity.
-pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
+pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
    narrow_mode: NarrowValueMode,
 ) -> Reg {
-    debug!("input_to_reg: input {:?}", input);
+    debug!("put_input_in_reg: input {:?}", input);
    let ty = ctx.input_ty(input.insn, input.input);
    let from_bits = ty_bits(ty) as u8;
    let inputs = ctx.get_input(input.insn, input.input);
@ -302,7 +302,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
 /// divide or a right-shift or a compare-to-zero), `narrow_mode` should be
 /// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting
 /// register will be provided the extended value.
-fn input_to_rs<C: LowerCtx<I = Inst>>(
+fn put_input_in_rs<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
    narrow_mode: NarrowValueMode,
@ -317,21 +317,21 @@ fn input_to_rs<C: LowerCtx<I = Inst>>(

            // Can we get the shift amount as an immediate?
            if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
-                let reg = input_to_reg(ctx, shiftee, narrow_mode);
+                let reg = put_input_in_reg(ctx, shiftee, narrow_mode);
                return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
            }
        }
    }

-    ResultRS::Reg(input_to_reg(ctx, input, narrow_mode))
+    ResultRS::Reg(put_input_in_reg(ctx, input, narrow_mode))
 }

 /// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
 /// This does not actually codegen the source instruction; it just uses the
 /// vreg into which the source instruction will generate its value.
 ///
-/// See note on `input_to_rs` for a description of `narrow_mode`.
-fn input_to_rse<C: LowerCtx<I = Inst>>(
+/// See note on `put_input_in_rs` for a description of `narrow_mode`.
+fn put_input_in_rse<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
    narrow_mode: NarrowValueMode,
@ -349,7 +349,7 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
            && ((narrow_mode.is_32bit() && out_bits < 32)
                || (!narrow_mode.is_32bit() && out_bits < 64))
        {
-            let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
+            let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
            let extendop = match (narrow_mode, out_bits) {
                (NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
                    ExtendOp::SXTB
@ -394,15 +394,15 @@ fn input_to_rse<C: LowerCtx<I = Inst>>(
                (false, 32) => ExtendOp::UXTW,
                _ => unreachable!(),
            };
-            let reg = input_to_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
+            let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
            return ResultRSE::RegExtend(reg, extendop);
        }
    }

-    ResultRSE::from_rs(input_to_rs(ctx, input, narrow_mode))
+    ResultRSE::from_rs(put_input_in_rs(ctx, input, narrow_mode))
 }

-pub(crate) fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
+pub(crate) fn put_input_in_rse_imm12<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
    narrow_mode: NarrowValueMode,
@ -413,10 +413,10 @@ pub(crate) fn input_to_rse_imm12<C: LowerCtx<I = Inst>>(
        }
    }

-    ResultRSEImm12::from_rse(input_to_rse(ctx, input, narrow_mode))
+    ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode))
 }

-pub(crate) fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
+pub(crate) fn put_input_in_rs_immlogic<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
    narrow_mode: NarrowValueMode,
@ -429,20 +429,22 @@ pub(crate) fn input_to_rs_immlogic<C: LowerCtx<I = Inst>>(
        }
    }

-    ResultRSImmLogic::from_rs(input_to_rs(ctx, input, narrow_mode))
+    ResultRSImmLogic::from_rs(put_input_in_rs(ctx, input, narrow_mode))
 }

-pub(crate) fn input_to_reg_immshift<C: LowerCtx<I = Inst>>(
+pub(crate) fn put_input_in_reg_immshift<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    input: InsnInput,
+    shift_width_bits: usize,
 ) -> ResultRegImmShift {
    if let Some(imm_value) = input_to_const(ctx, input) {
+        let imm_value = imm_value & ((shift_width_bits - 1) as u64);
        if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
            return ResultRegImmShift::ImmShift(immshift);
        }
    }

-    ResultRegImmShift::Reg(input_to_reg(ctx, input, NarrowValueMode::None))
+    ResultRegImmShift::Reg(put_input_in_reg(ctx, input, NarrowValueMode::None))
 }

 //============================================================================
@ -546,7 +548,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(

    // Handle one reg and offset.
    if addends.len() == 1 {
-        let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
+        let reg = put_input_in_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
        return MemArg::RegOffset(reg, offset as i64, elem_ty);
    }

@ -560,9 +562,9 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
                maybe_input_insn_multi(ctx, addends[i], &[Opcode::Uextend, Opcode::Sextend])
            {
                // Non-extended addend.
-                let r1 = input_to_reg(ctx, addends[1 - i], NarrowValueMode::ZeroExtend64);
+                let r1 = put_input_in_reg(ctx, addends[1 - i], NarrowValueMode::ZeroExtend64);
                // Extended addend.
-                let r2 = input_to_reg(
+                let r2 = put_input_in_reg(
                    ctx,
                    InsnInput {
                        insn: ext_insn,
@ -596,8 +598,8 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(

    // Handle two regs and a zero offset in the general case, if possible.
    if addends.len() == 2 && offset == 0 {
-        let ra = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
-        let rb = input_to_reg(ctx, addends[1], NarrowValueMode::ZeroExtend64);
+        let ra = put_input_in_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
+        let rb = put_input_in_reg(ctx, addends[1], NarrowValueMode::ZeroExtend64);
        return MemArg::reg_plus_reg(ra, rb);
    }

@ -609,7 +611,7 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(

    // Add each addend to the address.
    for addend in addends {
-        let reg = input_to_reg(ctx, *addend, NarrowValueMode::ZeroExtend64);
+        let reg = put_input_in_reg(ctx, *addend, NarrowValueMode::ZeroExtend64);

        // In an addition, the stack register is the zero register, so divert it to another
        // register just before doing the actual add.
@ -726,6 +728,77 @@ pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond {
    }
 }

+pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    rd: Writable<Reg>,
+    mut rn: Reg,
+    mut rm: Reg,
+    ty: Type,
+    cond: Cond,
+) -> CodegenResult<()> {
+    match ty {
+        F32X4 | F64X2 | I8X16 | I16X8 | I32X4 => {}
+        _ => {
+            return Err(CodegenError::Unsupported(format!(
+                "unsupported SIMD type: {:?}",
+                ty
+            )));
+        }
+    };
+
+    let is_float = match ty {
+        F32X4 | F64X2 => true,
+        _ => false,
+    };
+    // 'Less than' operations are implemented by swapping
+    // the order of operands and using the 'greater than'
+    // instructions.
+    // 'Not equal' is implemented with 'equal' and inverting
+    // the result.
+    let (alu_op, swap) = match (is_float, cond) {
+        (false, Cond::Eq) => (VecALUOp::Cmeq, false),
+        (false, Cond::Ne) => (VecALUOp::Cmeq, false),
+        (false, Cond::Ge) => (VecALUOp::Cmge, false),
+        (false, Cond::Gt) => (VecALUOp::Cmgt, false),
+        (false, Cond::Le) => (VecALUOp::Cmge, true),
+        (false, Cond::Lt) => (VecALUOp::Cmgt, true),
+        (false, Cond::Hs) => (VecALUOp::Cmhs, false),
+        (false, Cond::Hi) => (VecALUOp::Cmhi, false),
+        (false, Cond::Ls) => (VecALUOp::Cmhs, true),
+        (false, Cond::Lo) => (VecALUOp::Cmhi, true),
+        (true, Cond::Eq) => (VecALUOp::Fcmeq, false),
+        (true, Cond::Ne) => (VecALUOp::Fcmeq, false),
+        (true, Cond::Mi) => (VecALUOp::Fcmgt, true),
+        (true, Cond::Ls) => (VecALUOp::Fcmge, true),
+        (true, Cond::Ge) => (VecALUOp::Fcmge, false),
+        (true, Cond::Gt) => (VecALUOp::Fcmgt, false),
+        _ => unreachable!(),
+    };
+
+    if swap {
+        std::mem::swap(&mut rn, &mut rm);
+    }
+
+    ctx.emit(Inst::VecRRR {
+        alu_op,
+        rd,
+        rn,
+        rm,
+        ty,
+    });
+
+    if cond == Cond::Ne {
+        ctx.emit(Inst::VecMisc {
+            op: VecMisc2::Not,
+            rd,
+            rn: rd.to_reg(),
+            ty: I8X16,
+        });
+    }
+
+    Ok(())
+}
+
 /// Determines whether this condcode interprets inputs as signed or
 /// unsigned.  See the documentation for the `icmp` instruction in
 /// cranelift-codegen/meta/src/shared/instructions.rs for further insights
@ -762,6 +835,7 @@ pub fn ty_bits(ty: Type) -> usize {
        IFLAGS | FFLAGS => 32,
        B8X8 | I8X8 | B16X4 | I16X4 | B32X2 | I32X2 => 64,
        B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 => 128,
+        F32X4 | F64X2 => 128,
        _ => panic!("ty_bits() on unknown type: {:?}", ty),
    }
 }
@ -925,8 +999,8 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
        },
    ];
    let ty = ctx.input_ty(insn, 0);
-    let rn = input_to_reg(ctx, inputs[0], narrow_mode);
-    let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
+    let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+    let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
    debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm);
    let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
    let rd = writable_zero_reg();
@ -946,8 +1020,8 @@ pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, i
            input: 1,
        },
    ];
-    let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-    let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
    match bits {
        32 => {
            ctx.emit(Inst::FpuCmp32 { rn, rm });
--- a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
@ -7,7 +7,7 @@ use crate::ir::Inst as IRInst;
 use crate::ir::{InstructionData, Opcode, TrapCode};
 use crate::machinst::lower::*;
 use crate::machinst::*;
-use crate::{CodegenError, CodegenResult};
+use crate::CodegenResult;

 use crate::isa::aarch64::abi::*;
 use crate::isa::aarch64::inst::*;
@ -42,31 +42,31 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
    match op {
        Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
            let value = ctx.get_constant(insn).unwrap();
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            lower_constant_u64(ctx, rd, value);
        }
        Opcode::F32const => {
            let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            lower_constant_f32(ctx, rd, value);
        }
        Opcode::F64const => {
            let value = f64::from_bits(ctx.get_constant(insn).unwrap());
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            lower_constant_f64(ctx, rd, value);
        }
        Opcode::Iadd => {
-            let rd = output_to_reg(ctx, outputs[0]);
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = input_to_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
            let ty = ty.unwrap();
            let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64);
            ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
        }
        Opcode::Isub => {
-            let rd = output_to_reg(ctx, outputs[0]);
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = input_to_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
            let ty = ty.unwrap();
            let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
            ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
@ -87,9 +87,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            };
            let va = ctx.alloc_tmp(RegClass::V128, I128);
            let vb = ctx.alloc_tmp(RegClass::V128, I128);
-            let ra = input_to_reg(ctx, inputs[0], narrow_mode);
-            let rb = input_to_reg(ctx, inputs[1], narrow_mode);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
+            let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
+            let rd = get_output_reg(ctx, outputs[0]);
            ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
            ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
            ctx.emit(Inst::VecRRR {
@ -121,9 +121,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            };
            let va = ctx.alloc_tmp(RegClass::V128, I128);
            let vb = ctx.alloc_tmp(RegClass::V128, I128);
-            let ra = input_to_reg(ctx, inputs[0], narrow_mode);
-            let rb = input_to_reg(ctx, inputs[1], narrow_mode);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
+            let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
+            let rd = get_output_reg(ctx, outputs[0]);
            ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
            ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
            ctx.emit(Inst::VecRRR {
@ -142,18 +142,18 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Ineg => {
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            let rn = zero_reg();
-            let rm = input_to_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
            let ty = ty.unwrap();
            let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
            ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
        }

        Opcode::Imul => {
-            let rd = output_to_reg(ctx, outputs[0]);
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
            let ty = ty.unwrap();
            let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64);
            ctx.emit(Inst::AluRRRR {
@ -166,7 +166,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Umulhi | Opcode::Smulhi => {
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            let is_signed = op == Opcode::Smulhi;
            let input_ty = ctx.input_ty(insn, 0);
            assert!(ctx.input_ty(insn, 1) == input_ty);
@ -174,8 +174,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

            match input_ty {
                I64 => {
-                    let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-                    let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+                    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
                    let ra = zero_reg();
                    let alu_op = if is_signed {
                        ALUOp::SMulH
@ -196,8 +196,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    } else {
                        NarrowValueMode::ZeroExtend64
                    };
-                    let rn = input_to_reg(ctx, inputs[0], narrow_mode);
-                    let rm = input_to_reg(ctx, inputs[1], narrow_mode);
+                    let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+                    let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
                    let ra = zero_reg();
                    ctx.emit(Inst::AluRRRR {
                        alu_op: ALUOp::MAdd64,
@ -254,9 +254,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                ALUOp::UDiv64
            };

-            let rd = output_to_reg(ctx, outputs[0]);
-            let rn = input_to_reg(ctx, inputs[0], narrow_mode);
-            let rm = input_to_reg(ctx, inputs[1], narrow_mode);
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+            let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
            // The div instruction does not trap on divide by zero or signed overflow
            // so checks are inserted below.
            //
@ -372,8 +372,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                // If we reach this point, we weren't able to incorporate the extend as
                // a register-mode on another instruction, so we have a 'None'
                // narrow-value/extend mode here, and we emit the explicit instruction.
-                let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rd = output_to_reg(ctx, outputs[0]);
+                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                let rd = get_output_reg(ctx, outputs[0]);
                ctx.emit(Inst::Extend {
                    rd,
                    rn,
@ -385,15 +385,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Bnot => {
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            let ty = ty.unwrap();
            if ty_bits(ty) < 128 {
-                let rm = input_to_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
+                let rm = put_input_in_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
                let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
                // NOT rd, rm ==> ORR_NOT rd, zero, rm
                ctx.emit(alu_inst_immlogic(alu_op, rd, zero_reg(), rm));
            } else {
-                let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+                let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                ctx.emit(Inst::VecMisc {
                    op: VecMisc2::Not,
                    rd,
@ -409,11 +409,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::BandNot
        | Opcode::BorNot
        | Opcode::BxorNot => {
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            let ty = ty.unwrap();
            if ty_bits(ty) < 128 {
-                let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rm = input_to_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
+                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                let rm = put_input_in_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
                let alu_op = match op {
                    Opcode::Band => choose_32_64(ty, ALUOp::And32, ALUOp::And64),
                    Opcode::Bor => choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64),
@ -433,9 +433,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    _ => unreachable!(),
                };

-                let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
-                let rd = output_to_reg(ctx, outputs[0]);
+                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+                let rd = get_output_reg(ctx, outputs[0]);

                ctx.emit(Inst::VecRRR {
                    alu_op,
@ -458,9 +458,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                (Opcode::Sshr, InstSize::Size32) => NarrowValueMode::SignExtend32,
                _ => unreachable!(),
            };
-            let rd = output_to_reg(ctx, outputs[0]);
-            let rn = input_to_reg(ctx, inputs[0], narrow_mode);
-            let rm = input_to_reg_immshift(ctx, inputs[1]);
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+            let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));
            let alu_op = match op {
                Opcode::Ishl => choose_32_64(ty, ALUOp::Lsl32, ALUOp::Lsl64),
                Opcode::Ushr => choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64),
@ -503,8 +503,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let ty = ty.unwrap();
            let ty_bits_size = ty_bits(ty) as u8;

-            let rd = output_to_reg(ctx, outputs[0]);
-            let rn = input_to_reg(
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(
                ctx,
                inputs[0],
                if ty_bits_size <= 32 {
@ -513,7 +513,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    NarrowValueMode::ZeroExtend64
                },
            );
-            let rm = input_to_reg_immshift(ctx, inputs[1]);
+            let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));

            if ty_bits_size == 32 || ty_bits_size == 64 {
                let alu_op = choose_32_64(ty, ALUOp::RotR32, ALUOp::RotR64);
@ -652,7 +652,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Bitrev | Opcode::Clz | Opcode::Cls | Opcode::Ctz => {
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            let needs_zext = match op {
                Opcode::Bitrev | Opcode::Ctz => false,
                Opcode::Clz | Opcode::Cls => true,
@ -666,7 +666,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            } else {
                NarrowValueMode::None
            };
-            let rn = input_to_reg(ctx, inputs[0], narrow_mode);
+            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
            let op_ty = match ty {
                I8 | I16 | I32 => I32,
                I64 => I64,
@ -722,11 +722,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            //   x += x << 32
            //   x >> 56
            let ty = ty.unwrap();
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            // FIXME(#1537): zero-extend 8/16/32-bit operands only to 32 bits,
            // and fix the sequence below to work properly for this.
            let narrow_mode = NarrowValueMode::ZeroExtend64;
-            let rn = input_to_reg(ctx, inputs[0], narrow_mode);
+            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
            let tmp = ctx.alloc_tmp(RegClass::I64, I64);

            // If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
@ -903,7 +903,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let is_float = ty_is_float(elem_ty);

            let mem = lower_address(ctx, elem_ty, &inputs[..], off);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);

            let memflags = ctx.memflags(insn).expect("memory flags");
            let srcloc = if !memflags.notrap() {
@ -967,7 +967,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let is_float = ty_is_float(elem_ty);

            let mem = lower_address(ctx, elem_ty, &inputs[1..], off);
-            let rd = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);

            let memflags = ctx.memflags(insn).expect("memory flags");
            let srcloc = if !memflags.notrap() {
@ -997,7 +997,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                } => (stack_slot, offset),
                _ => unreachable!(),
            };
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            let offset: i32 = offset.into();
            let inst = ctx
                .abi()
@ -1023,7 +1023,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            // Nothing.
        }

-        Opcode::Select | Opcode::Selectif => {
+        Opcode::Select | Opcode::Selectif | Opcode::SelectifSpectreGuard => {
            let cond = if op == Opcode::Select {
                let (cmp_op, narrow_mode) = if ty_bits(ctx.input_ty(insn, 0)) > 32 {
                    (ALUOp::SubS64, NarrowValueMode::ZeroExtend64)
@ -1031,7 +1031,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    (ALUOp::SubS32, NarrowValueMode::ZeroExtend32)
                };

-                let rcond = input_to_reg(ctx, inputs[0], narrow_mode);
+                let rcond = put_input_in_reg(ctx, inputs[0], narrow_mode);
                // cmp rcond, #0
                ctx.emit(Inst::AluRRR {
                    alu_op: cmp_op,
@ -1052,9 +1052,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            };

            // csel.COND rd, rn, rm
-            let rd = output_to_reg(ctx, outputs[0]);
-            let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
-            let rm = input_to_reg(ctx, inputs[2], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+            let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
            let ty = ctx.output_ty(insn, 0);
            let bits = ty_bits(ty);
            if ty_is_float(ty) && bits == 32 {
@ -1070,10 +1070,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let ty = ty.unwrap();
            if ty_bits(ty) < 128 {
                let tmp = ctx.alloc_tmp(RegClass::I64, I64);
-                let rd = output_to_reg(ctx, outputs[0]);
-                let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
-                let rm = input_to_reg(ctx, inputs[2], NarrowValueMode::None);
+                let rd = get_output_reg(ctx, outputs[0]);
+                let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+                let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
                // AND rTmp, rn, rcond
                ctx.emit(Inst::AluRRR {
                    alu_op: ALUOp::And64,
@ -1096,10 +1096,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rm: tmp.to_reg(),
                });
            } else {
-                let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
-                let rm = input_to_reg(ctx, inputs[2], NarrowValueMode::None);
-                let rd = output_to_reg(ctx, outputs[0]);
+                let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+                let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+                let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
+                let rd = get_output_reg(ctx, outputs[0]);
                ctx.emit(Inst::gen_move(rd, rcond, ty));

                ctx.emit(Inst::VecRRR {
@ -1120,7 +1120,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            // single-def ifcmp.
            let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap();
            lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            ctx.emit(Inst::CSet { rd, cond });
        }

@ -1129,7 +1129,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let cond = lower_fp_condcode(condcode);
            let ffcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ffcmp).unwrap();
            lower_fcmp_or_ffcmp_to_flags(ctx, ffcmp_insn);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            ctx.emit(Inst::CSet { rd, cond });
        }

@ -1138,8 +1138,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Copy => {
-            let rd = output_to_reg(ctx, outputs[0]);
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let ty = ctx.input_ty(insn, 0);
            ctx.emit(Inst::gen_move(rd, rn, ty));
        }
@ -1157,16 +1157,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            // - Ireduce: changing width of an integer. Smaller ints are stored
            //   with undefined high-order bits, so we can simply do a copy.

-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
+            let rd = get_output_reg(ctx, outputs[0]);
            let ty = ctx.input_ty(insn, 0);
            ctx.emit(Inst::gen_move(rd, rn, ty));
        }

        Opcode::Bmask => {
            // Bool is {0, 1}, so we can subtract from 0 to get all-1s.
-            let rd = output_to_reg(ctx, outputs[0]);
-            let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
            ctx.emit(Inst::AluRRR {
                alu_op: ALUOp::Sub64,
                rd,
@ -1176,7 +1176,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Bitcast => {
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            let ity = ctx.input_ty(insn, 0);
            let oty = ctx.output_ty(insn, 0);
            match (ty_is_float(ity), ty_is_float(oty)) {
@ -1186,19 +1186,19 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    } else {
                        NarrowValueMode::ZeroExtend64
                    };
-                    let rm = input_to_reg(ctx, inputs[0], narrow_mode);
+                    let rm = put_input_in_reg(ctx, inputs[0], narrow_mode);
                    ctx.emit(Inst::gen_move(rd, rm, oty));
                }
                (false, false) => {
-                    let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+                    let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                    ctx.emit(Inst::gen_move(rd, rm, oty));
                }
                (false, true) => {
-                    let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
+                    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
                    ctx.emit(Inst::MovToVec64 { rd, rn });
                }
                (true, false) => {
-                    let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+                    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                    ctx.emit(Inst::MovFromVec {
                        rd,
                        rn,
@ -1214,7 +1214,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                // N.B.: according to the AArch64 ABI, the top bits of a register
                // (above the bits for the value's type) are undefined, so we
                // need not extend the return values.
-                let reg = input_to_reg(ctx, *input, NarrowValueMode::None);
+                let reg = put_input_in_reg(ctx, *input, NarrowValueMode::None);
                let retval_reg = ctx.retval(i);
                let ty = ctx.input_ty(insn, i);
                ctx.emit(Inst::gen_move(retval_reg, reg, ty));
@ -1234,6 +1234,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let condcode = inst_condcode(ctx.data(insn)).unwrap();
            let cond = lower_condcode(condcode);
            let is_signed = condcode_is_signed(condcode);
+            let rd = get_output_reg(ctx, outputs[0]);
            let ty = ctx.input_ty(insn, 0);
            let bits = ty_bits(ty);
            let narrow_mode = match (bits <= 32, is_signed) {
@ -1242,68 +1243,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                (false, true) => NarrowValueMode::SignExtend64,
                (false, false) => NarrowValueMode::ZeroExtend64,
            };
+            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);

            if ty_bits(ty) < 128 {
                let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
-                let rn = input_to_reg(ctx, inputs[0], narrow_mode);
-                let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
-                let rd = output_to_reg(ctx, outputs[0]);
+                let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
                ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
                ctx.emit(Inst::CondSet { cond, rd });
            } else {
-                match ty {
-                    I8X16 | I16X8 | I32X4 => {}
-                    _ => {
-                        return Err(CodegenError::Unsupported(format!(
-                            "unsupported simd type: {:?}",
-                            ty
-                        )));
-                    }
-                };
-
-                let mut rn = input_to_reg(ctx, inputs[0], narrow_mode);
-                let mut rm = input_to_reg(ctx, inputs[1], narrow_mode);
-                let rd = output_to_reg(ctx, outputs[0]);
-
-                // 'Less than' operations are implemented by swapping
-                // the order of operands and using the 'greater than'
-                // instructions.
-                // 'Not equal' is implemented with 'equal' and inverting
-                // the result.
-                let (alu_op, swap) = match cond {
-                    Cond::Eq => (VecALUOp::Cmeq, false),
-                    Cond::Ne => (VecALUOp::Cmeq, false),
-                    Cond::Ge => (VecALUOp::Cmge, false),
-                    Cond::Gt => (VecALUOp::Cmgt, false),
-                    Cond::Le => (VecALUOp::Cmge, true),
-                    Cond::Lt => (VecALUOp::Cmgt, true),
-                    Cond::Hs => (VecALUOp::Cmhs, false),
-                    Cond::Hi => (VecALUOp::Cmhi, false),
-                    Cond::Ls => (VecALUOp::Cmhs, true),
-                    Cond::Lo => (VecALUOp::Cmhi, true),
-                    _ => unreachable!(),
-                };
-
-                if swap {
-                    std::mem::swap(&mut rn, &mut rm);
-                }
-
-                ctx.emit(Inst::VecRRR {
-                    alu_op,
-                    rd,
-                    rn,
-                    rm,
-                    ty,
-                });
-
-                if cond == Cond::Ne {
-                    ctx.emit(Inst::VecMisc {
-                        op: VecMisc2::Not,
-                        rd,
-                        rn: rd.to_reg(),
-                        ty: I8X16,
-                    });
-                }
+                let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
+                lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
            }
        }

@ -1311,19 +1260,24 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let condcode = inst_fp_condcode(ctx.data(insn)).unwrap();
            let cond = lower_fp_condcode(condcode);
            let ty = ctx.input_ty(insn, 0);
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
-            let rd = output_to_reg(ctx, outputs[0]);
-            match ty_bits(ty) {
-                32 => {
-                    ctx.emit(Inst::FpuCmp32 { rn, rm });
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
+
+            if ty_bits(ty) < 128 {
+                match ty_bits(ty) {
+                    32 => {
+                        ctx.emit(Inst::FpuCmp32 { rn, rm });
+                    }
+                    64 => {
+                        ctx.emit(Inst::FpuCmp64 { rn, rm });
+                    }
+                    _ => panic!("Bad float size"),
                }
-                64 => {
-                    ctx.emit(Inst::FpuCmp64 { rn, rm });
-                }
-                _ => panic!("Bad float size"),
+                ctx.emit(Inst::CondSet { cond, rd });
+            } else {
+                lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
            }
-            ctx.emit(Inst::CondSet { cond, rd });
        }

        Opcode::JumpTableEntry | Opcode::JumpTableBase => {
@ -1390,7 +1344,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::FuncAddr => {
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            let (extname, _) = ctx.call_target(insn).unwrap();
            let extname = extname.clone();
            let loc = ctx.srcloc(insn);
@ -1407,7 +1361,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::SymbolValue => {
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            let (extname, _, offset) = ctx.symbol_value(insn).unwrap();
            let extname = extname.clone();
            let loc = ctx.srcloc(insn);
@ -1434,7 +1388,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    )
                }
                Opcode::CallIndirect => {
-                    let ptr = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
+                    let ptr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
                    let sig = ctx.call_sig(insn).unwrap();
                    assert!(inputs.len() - 1 == sig.params.len());
                    assert!(outputs.len() == sig.returns.len());
@ -1446,24 +1400,24 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            abi.emit_stack_pre_adjust(ctx);
            assert!(inputs.len() == abi.num_args());
            for (i, input) in inputs.iter().enumerate() {
-                let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
+                let arg_reg = put_input_in_reg(ctx, *input, NarrowValueMode::None);
                abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
            }
            abi.emit_call(ctx);
            for (i, output) in outputs.iter().enumerate() {
-                let retval_reg = output_to_reg(ctx, *output);
+                let retval_reg = get_output_reg(ctx, *output);
                abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
            }
            abi.emit_stack_post_adjust(ctx);
        }

        Opcode::GetPinnedReg => {
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            ctx.emit(Inst::mov(rd, xreg(PINNED_REG)));
        }

        Opcode::SetPinnedReg => {
-            let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            ctx.emit(Inst::mov(writable_xreg(PINNED_REG), rm));
        }

@ -1497,13 +1451,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

        Opcode::Vconst => {
            let value = output_to_const_f128(ctx, outputs[0]).unwrap();
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rd = get_output_reg(ctx, outputs[0]);
            lower_constant_f128(ctx, rd, value);
        }

        Opcode::RawBitcast => {
-            let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
            let ty = ctx.input_ty(insn, 0);
            ctx.emit(Inst::gen_move(rd, rm, ty));
        }
@ -1511,8 +1465,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Extractlane => {
            if let InstructionData::BinaryImm8 { imm, .. } = ctx.data(insn) {
                let idx = *imm;
-                let rd = output_to_reg(ctx, outputs[0]);
-                let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+                let rd = get_output_reg(ctx, outputs[0]);
+                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                let ty = ty.unwrap();

                if ty_is_int(ty) {
@ -1529,8 +1483,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Splat => {
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
            let ty = ctx.input_ty(insn, 0);
            let inst = if ty_is_int(ty) {
                Inst::VecDup { rd, rn, ty }
@ -1540,12 +1494,58 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ctx.emit(inst);
        }

+        Opcode::VanyTrue | Opcode::VallTrue => {
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let tmp = ctx.alloc_tmp(RegClass::V128, ty.unwrap());
+
+            // This operation is implemented by using umaxp or uminv to
+            // create a scalar value, which is then compared against zero.
+            //
+            // umaxp vn.16b, vm.16, vm.16 / uminv bn, vm.16b
+            // mov xm, vn.d[0]
+            // cmp xm, #0
+            // cset xm, ne
+
+            let input_ty = ctx.input_ty(insn, 0);
+            if op == Opcode::VanyTrue {
+                ctx.emit(Inst::VecRRR {
+                    alu_op: VecALUOp::Umaxp,
+                    rd: tmp,
+                    rn: rm,
+                    rm: rm,
+                    ty: input_ty,
+                });
+            } else {
+                ctx.emit(Inst::VecLanes {
+                    op: VecLanesOp::Uminv,
+                    rd: tmp,
+                    rn: rm,
+                    ty: input_ty,
+                });
+            };
+
+            ctx.emit(Inst::MovFromVec {
+                rd,
+                rn: tmp.to_reg(),
+                idx: 0,
+                ty: I64,
+            });
+
+            ctx.emit(Inst::AluRRImm12 {
+                alu_op: ALUOp::SubS64,
+                rd: writable_zero_reg(),
+                rn: rd.to_reg(),
+                imm12: Imm12::zero(),
+            });
+
+            ctx.emit(Inst::CSet { rd, cond: Cond::Ne });
+        }
+
        Opcode::Shuffle
        | Opcode::Vsplit
        | Opcode::Vconcat
        | Opcode::Vselect
-        | Opcode::VanyTrue
-        | Opcode::VallTrue
        | Opcode::Insertlane
        | Opcode::ScalarToVector
        | Opcode::Swizzle
@ -1581,9 +1581,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                (Opcode::Fmax, 64) => FPUOp2::Max64,
                _ => panic!("Unknown op/bits combination"),
            };
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
            ctx.emit(Inst::FpuRRR { fpu_op, rd, rn, rm });
        }

@ -1602,8 +1602,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                (Opcode::Fdemote, 64) => panic!("Cannot demote to 64 bits"),
                _ => panic!("Unknown op/bits combination"),
            };
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
            ctx.emit(Inst::FpuRR { fpu_op, rd, rn });
        }

@ -1620,8 +1620,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                (Opcode::Nearest, 64) => FpuRoundMode::Nearest64,
                _ => panic!("Unknown op/bits combination"),
            };
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
            ctx.emit(Inst::FpuRound { op, rd, rn });
        }

@ -1632,10 +1632,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                64 => FPUOp3::MAdd64,
                _ => panic!("Unknown op size"),
            };
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
-            let ra = input_to_reg(ctx, inputs[2], NarrowValueMode::None);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+            let ra = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
            ctx.emit(Inst::FpuRRRR {
                fpu_op,
                rn,
@ -1658,9 +1658,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let ty = ctx.output_ty(insn, 0);
            let bits = ty_bits(ty) as u8;
            assert!(bits == 32 || bits == 64);
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
            let tmp = ctx.alloc_tmp(RegClass::V128, F64);

            // Copy LHS to rd.
@ -1699,8 +1699,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                _ => panic!("Unknown input/output-bits combination"),
            };

-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);

            // First, check the output: it's important to carry the NaN conversion before the
            // in-bounds conversion, per wasm semantics.
@ -1842,8 +1842,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                (true, 64) => NarrowValueMode::SignExtend64,
                _ => panic!("Unknown input size"),
            };
-            let rn = input_to_reg(ctx, inputs[0], narrow_mode);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+            let rd = get_output_reg(ctx, outputs[0]);
            ctx.emit(Inst::IntToFpu { op, rd, rn });
        }

@ -1853,8 +1853,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let out_ty = ctx.output_ty(insn, 0);
            let out_bits = ty_bits(out_ty);
            let out_signed = op == Opcode::FcvtToSintSat;
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rd = output_to_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);

            // FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
            // FMIN Vtmp2, Vin, Vtmp1
@ -1991,9 +1991,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(

            // Now handle the iadd as above, except use an AddS opcode that sets
            // flags.
-            let rd = output_to_reg(ctx, outputs[0]);
-            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = input_to_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
+            let rd = get_output_reg(ctx, outputs[0]);
+            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
            let ty = ty.unwrap();
            let alu_op = choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64);
            ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
@ -2069,6 +2069,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            panic!("x86-specific opcode in supposedly arch-neutral IR!");
        }

+        Opcode::Iabs => unimplemented!(),
        Opcode::AvgRound => unimplemented!(),
        Opcode::TlsValue => unimplemented!(),
    }
@ -2139,7 +2140,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                        kind: CondBrKind::Cond(cond),
                    });
                } else {
-                    let rt = input_to_reg(
+                    let rt = put_input_in_reg(
                        ctx,
                        InsnInput {
                            insn: branches[0],
@ -2173,7 +2174,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    (false, true) => NarrowValueMode::SignExtend64,
                    (false, false) => NarrowValueMode::ZeroExtend64,
                };
-                let rn = input_to_reg(
+                let rn = put_input_in_reg(
                    ctx,
                    InsnInput {
                        insn: branches[0],
@ -2181,7 +2182,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    },
                    narrow_mode,
                );
-                let rm = input_to_rse_imm12(
+                let rm = put_input_in_rse_imm12(
                    ctx,
                    InsnInput {
                        insn: branches[0],
@ -2220,7 +2221,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                } else {
                    // If the ifcmp result is actually placed in a
                    // register, we need to move it back into the flags.
-                    let rn = input_to_reg(ctx, flag_input, NarrowValueMode::None);
+                    let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None);
                    ctx.emit(Inst::MovToNZCV { rn });
                    ctx.emit(Inst::CondBr {
                        taken,
@ -2248,7 +2249,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                } else {
                    // If the ffcmp result is actually placed in a
                    // register, we need to move it back into the flags.
-                    let rn = input_to_reg(ctx, flag_input, NarrowValueMode::None);
+                    let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None);
                    ctx.emit(Inst::MovToNZCV { rn });
                    ctx.emit(Inst::CondBr {
                        taken,
@ -2294,7 +2295,7 @@ pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
                    needed_space: 4 * (6 + jt_size) as CodeOffset,
                });

-                let ridx = input_to_reg(
+                let ridx = put_input_in_reg(
                    ctx,
                    InsnInput {
                        insn: branches[0],
--- a/third_party/rust/cranelift-codegen/src/isa/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/mod.rs
@ -150,6 +150,7 @@ pub enum LookupError {

 /// Builder for a `TargetIsa`.
 /// Modify the ISA-specific settings before creating the `TargetIsa` trait object with `finish`.
+#[derive(Clone)]
 pub struct Builder {
    triple: Triple,
    setup: settings::Builder,
--- a/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
@ -3,20 +3,25 @@
 use std::fmt;
 use std::string::{String, ToString};

-use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector};
+use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper};

+use crate::ir::condcodes::IntCC;
 use crate::machinst::*;

-use super::regs::show_ireg_sized;
+use super::{
+    regs::{self, show_ireg_sized},
+    EmitState,
+};

-/// A Memory Address. These denote a 64-bit value only.
+/// A possible addressing mode (amode) that can be used in instructions.
+/// These denote a 64-bit value only.
 #[derive(Clone)]
-pub(crate) enum Addr {
+pub enum Amode {
    /// Immediate sign-extended and a Register.
-    IR { simm32: u32, base: Reg },
+    ImmReg { simm32: u32, base: Reg },

    /// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift)
-    IRRS {
+    ImmRegRegShift {
        simm32: u32,
        base: Reg,
        index: Reg,
@ -24,19 +29,17 @@ pub(crate) enum Addr {
    },
 }

-impl Addr {
-    // Constructors.
-
+impl Amode {
    pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
        debug_assert!(base.get_class() == RegClass::I64);
-        Self::IR { simm32, base }
+        Self::ImmReg { simm32, base }
    }

    pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self {
        debug_assert!(base.get_class() == RegClass::I64);
        debug_assert!(index.get_class() == RegClass::I64);
        debug_assert!(shift <= 3);
-        Addr::IRRS {
+        Self::ImmRegRegShift {
            simm32,
            base,
            index,
@ -47,15 +50,10 @@ impl Addr {
    /// Add the regs mentioned by `self` to `collector`.
    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
        match self {
-            Addr::IR { simm32: _, base } => {
+            Amode::ImmReg { base, .. } => {
                collector.add_use(*base);
            }
-            Addr::IRRS {
-                simm32: _,
-                base,
-                index,
-                shift: _,
-            } => {
+            Amode::ImmRegRegShift { base, index, .. } => {
                collector.add_use(*base);
                collector.add_use(*index);
            }
@ -63,11 +61,13 @@ impl Addr {
    }
 }

-impl ShowWithRRU for Addr {
+impl ShowWithRRU for Amode {
    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
        match self {
-            Addr::IR { simm32, base } => format!("{}({})", *simm32 as i32, base.show_rru(mb_rru)),
-            Addr::IRRS {
+            Amode::ImmReg { simm32, base } => {
+                format!("{}({})", *simm32 as i32, base.show_rru(mb_rru))
+            }
+            Amode::ImmRegRegShift {
                simm32,
                base,
                index,
@ -83,51 +83,119 @@ impl ShowWithRRU for Addr {
    }
 }

-/// An operand which is either an integer Register, a value in Memory or an Immediate.  This can
-/// denote an 8, 16, 32 or 64 bit value.  For the Immediate form, in the 8- and 16-bit case, only
-/// the lower 8 or 16 bits of `simm32` is relevant.  In the 64-bit case, the value denoted by
-/// `simm32` is its sign-extension out to 64 bits.
+/// A Memory Address. These denote a 64-bit value only.
+/// Used for usual addressing modes as well as addressing modes used during compilation, when the
+/// moving SP offset is not known.
 #[derive(Clone)]
-pub(crate) enum RMI {
-    R { reg: Reg },
-    M { addr: Addr },
-    I { simm32: u32 },
+pub enum SyntheticAmode {
+    /// A real amode.
+    Real(Amode),
+
+    /// A (virtual) offset to the "nominal SP" value, which will be recomputed as we push and pop
+    /// within the function.
+    NominalSPOffset { simm32: u32 },
 }

-impl RMI {
-    // Constructors
-
-    pub(crate) fn reg(reg: Reg) -> RMI {
-        debug_assert!(reg.get_class() == RegClass::I64);
-        RMI::R { reg }
-    }
-    pub(crate) fn mem(addr: Addr) -> RMI {
-        RMI::M { addr }
-    }
-    pub(crate) fn imm(simm32: u32) -> RMI {
-        RMI::I { simm32 }
+impl SyntheticAmode {
+    pub(crate) fn nominal_sp_offset(simm32: u32) -> Self {
+        SyntheticAmode::NominalSPOffset { simm32 }
    }

    /// Add the regs mentioned by `self` to `collector`.
    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
        match self {
-            RMI::R { reg } => collector.add_use(*reg),
-            RMI::M { addr } => addr.get_regs_as_uses(collector),
-            RMI::I { simm32: _ } => {}
+            SyntheticAmode::Real(addr) => addr.get_regs_as_uses(collector),
+            SyntheticAmode::NominalSPOffset { .. } => {
+                // Nothing to do; the base is SP and isn't involved in regalloc.
+            }
+        }
+    }
+
+    pub(crate) fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
+        match self {
+            SyntheticAmode::Real(addr) => addr.map_uses(map),
+            SyntheticAmode::NominalSPOffset { .. } => {
+                // Nothing to do.
+            }
+        }
+    }
+
+    pub(crate) fn finalize(&self, state: &mut EmitState) -> Amode {
+        match self {
+            SyntheticAmode::Real(addr) => addr.clone(),
+            SyntheticAmode::NominalSPOffset { simm32 } => {
+                let off = *simm32 as i64 + state.virtual_sp_offset;
+                // TODO will require a sequence of add etc.
+                assert!(
+                    off <= u32::max_value() as i64,
+                    "amode finalize: add sequence NYI"
+                );
+                Amode::imm_reg(off as u32, regs::rsp())
+            }
        }
    }
 }

-impl ShowWithRRU for RMI {
+impl Into<SyntheticAmode> for Amode {
+    fn into(self) -> SyntheticAmode {
+        SyntheticAmode::Real(self)
+    }
+}
+
+impl ShowWithRRU for SyntheticAmode {
+    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+        match self {
+            SyntheticAmode::Real(addr) => addr.show_rru(mb_rru),
+            SyntheticAmode::NominalSPOffset { simm32 } => {
+                format!("rsp({} + virtual offset)", *simm32 as i32)
+            }
+        }
+    }
+}
+
+/// An operand which is either an integer Register, a value in Memory or an Immediate.  This can
+/// denote an 8, 16, 32 or 64 bit value.  For the Immediate form, in the 8- and 16-bit case, only
+/// the lower 8 or 16 bits of `simm32` is relevant.  In the 64-bit case, the value denoted by
+/// `simm32` is its sign-extension out to 64 bits.
+#[derive(Clone)]
+pub enum RegMemImm {
+    Reg { reg: Reg },
+    Mem { addr: SyntheticAmode },
+    Imm { simm32: u32 },
+}
+
+impl RegMemImm {
+    pub(crate) fn reg(reg: Reg) -> Self {
+        debug_assert!(reg.get_class() == RegClass::I64);
+        Self::Reg { reg }
+    }
+    pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
+        Self::Mem { addr: addr.into() }
+    }
+    pub(crate) fn imm(simm32: u32) -> Self {
+        Self::Imm { simm32 }
+    }
+
+    /// Add the regs mentioned by `self` to `collector`.
+    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+        match self {
+            Self::Reg { reg } => collector.add_use(*reg),
+            Self::Mem { addr } => addr.get_regs_as_uses(collector),
+            Self::Imm { simm32: _ } => {}
+        }
+    }
+}
+
+impl ShowWithRRU for RegMemImm {
    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
        self.show_rru_sized(mb_rru, 8)
    }

    fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
        match self {
-            RMI::R { reg } => show_ireg_sized(*reg, mb_rru, size),
-            RMI::M { addr } => addr.show_rru(mb_rru),
-            RMI::I { simm32 } => format!("${}", *simm32 as i32),
+            Self::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
+            Self::Mem { addr } => addr.show_rru(mb_rru),
+            Self::Imm { simm32 } => format!("${}", *simm32 as i32),
        }
    }
 }
@ -135,48 +203,45 @@ impl ShowWithRRU for RMI {
 /// An operand which is either an integer Register or a value in Memory.  This can denote an 8, 16,
 /// 32 or 64 bit value.
 #[derive(Clone)]
-pub(crate) enum RM {
-    R { reg: Reg },
-    M { addr: Addr },
+pub enum RegMem {
+    Reg { reg: Reg },
+    Mem { addr: SyntheticAmode },
 }

-impl RM {
-    // Constructors.
-
+impl RegMem {
    pub(crate) fn reg(reg: Reg) -> Self {
        debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128);
-        RM::R { reg }
+        Self::Reg { reg }
    }
-
-    pub(crate) fn mem(addr: Addr) -> Self {
-        RM::M { addr }
+    pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
+        Self::Mem { addr: addr.into() }
    }

    /// Add the regs mentioned by `self` to `collector`.
    pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
        match self {
-            RM::R { reg } => collector.add_use(*reg),
-            RM::M { addr } => addr.get_regs_as_uses(collector),
+            RegMem::Reg { reg } => collector.add_use(*reg),
+            RegMem::Mem { addr } => addr.get_regs_as_uses(collector),
        }
    }
 }

-impl ShowWithRRU for RM {
+impl ShowWithRRU for RegMem {
    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
        self.show_rru_sized(mb_rru, 8)
    }

    fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
        match self {
-            RM::R { reg } => show_ireg_sized(*reg, mb_rru, size),
-            RM::M { addr } => addr.show_rru(mb_rru),
+            RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
+            RegMem::Mem { addr } => addr.show_rru(mb_rru),
        }
    }
 }

 /// Some basic ALU operations.  TODO: maybe add Adc, Sbb.
 #[derive(Clone, PartialEq)]
-pub enum RMI_R_Op {
+pub enum AluRmiROpcode {
    Add,
    Sub,
    And,
@ -186,89 +251,186 @@ pub enum RMI_R_Op {
    Mul,
 }

-impl RMI_R_Op {
-    pub(crate) fn to_string(&self) -> String {
+impl fmt::Debug for AluRmiROpcode {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        let name = match self {
+            AluRmiROpcode::Add => "add",
+            AluRmiROpcode::Sub => "sub",
+            AluRmiROpcode::And => "and",
+            AluRmiROpcode::Or => "or",
+            AluRmiROpcode::Xor => "xor",
+            AluRmiROpcode::Mul => "imul",
+        };
+        write!(fmt, "{}", name)
+    }
+}
+
+impl ToString for AluRmiROpcode {
+    fn to_string(&self) -> String {
+        format!("{:?}", self)
+    }
+}
+
+pub(crate) enum InstructionSet {
+    SSE,
+    SSE2,
+    SSE41,
+}
+
+/// Some scalar SSE operations requiring 2 operands r/m and r.
+/// TODO: Below only includes scalar operations. To be seen if packed will be added here.
+#[derive(Clone, PartialEq)]
+pub enum SseOpcode {
+    Addss,
+    Addsd,
+    Andps,
+    Andnps,
+    Comiss,
+    Comisd,
+    Cmpss,
+    Cmpsd,
+    Cvtsd2ss,
+    Cvtsd2si,
+    Cvtsi2ss,
+    Cvtsi2sd,
+    Cvtss2si,
+    Cvtss2sd,
+    Cvttss2si,
+    Cvttsd2si,
+    Divss,
+    Divsd,
+    Insertps,
+    Maxss,
+    Maxsd,
+    Minss,
+    Minsd,
+    Movaps,
+    Movd,
+    Movss,
+    Movsd,
+    Mulss,
+    Mulsd,
+    Orps,
+    Rcpss,
+    Roundss,
+    Roundsd,
+    Rsqrtss,
+    Sqrtss,
+    Sqrtsd,
+    Subss,
+    Subsd,
+    Ucomiss,
+    Ucomisd,
+}
+
+impl SseOpcode {
+    /// Which `InstructionSet` is the first supporting this opcode?
+    pub(crate) fn available_from(&self) -> InstructionSet {
+        use InstructionSet::*;
        match self {
-            RMI_R_Op::Add => "add".to_string(),
-            RMI_R_Op::Sub => "sub".to_string(),
-            RMI_R_Op::And => "and".to_string(),
-            RMI_R_Op::Or => "or".to_string(),
-            RMI_R_Op::Xor => "xor".to_string(),
-            RMI_R_Op::Mul => "imul".to_string(),
+            SseOpcode::Addss
+            | SseOpcode::Andps
+            | SseOpcode::Andnps
+            | SseOpcode::Cvtsi2ss
+            | SseOpcode::Cvtss2si
+            | SseOpcode::Cvttss2si
+            | SseOpcode::Divss
+            | SseOpcode::Maxss
+            | SseOpcode::Movaps
+            | SseOpcode::Minss
+            | SseOpcode::Movss
+            | SseOpcode::Mulss
+            | SseOpcode::Orps
+            | SseOpcode::Rcpss
+            | SseOpcode::Rsqrtss
+            | SseOpcode::Subss
+            | SseOpcode::Ucomiss
+            | SseOpcode::Sqrtss
+            | SseOpcode::Comiss
+            | SseOpcode::Cmpss => SSE,
+
+            SseOpcode::Addsd
+            | SseOpcode::Cvtsd2ss
+            | SseOpcode::Cvtsd2si
+            | SseOpcode::Cvtsi2sd
+            | SseOpcode::Cvtss2sd
+            | SseOpcode::Cvttsd2si
+            | SseOpcode::Divsd
+            | SseOpcode::Maxsd
+            | SseOpcode::Minsd
+            | SseOpcode::Movd
+            | SseOpcode::Movsd
+            | SseOpcode::Mulsd
+            | SseOpcode::Sqrtsd
+            | SseOpcode::Subsd
+            | SseOpcode::Ucomisd
+            | SseOpcode::Comisd
+            | SseOpcode::Cmpsd => SSE2,
+
+            SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41,
+        }
+    }
+
+    /// Returns the src operand size for an instruction
+    pub(crate) fn src_size(&self) -> u8 {
+        match self {
+            SseOpcode::Movd => 4,
+            _ => 8,
        }
    }
 }

-impl fmt::Debug for RMI_R_Op {
+impl fmt::Debug for SseOpcode {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "{}", self.to_string())
+        let name = match self {
+            SseOpcode::Addss => "addss",
+            SseOpcode::Addsd => "addsd",
+            SseOpcode::Andps => "andps",
+            SseOpcode::Andnps => "andnps",
+            SseOpcode::Comiss => "comiss",
+            SseOpcode::Comisd => "comisd",
+            SseOpcode::Cvtsd2ss => "cvtsd2ss",
+            SseOpcode::Cvtsd2si => "cvtsd2si",
+            SseOpcode::Cvtsi2ss => "cvtsi2ss",
+            SseOpcode::Cvtsi2sd => "cvtsi2sd",
+            SseOpcode::Cvtss2si => "cvtss2si",
+            SseOpcode::Cvtss2sd => "cvtss2sd",
+            SseOpcode::Cvttss2si => "cvttss2si",
+            SseOpcode::Cvttsd2si => "cvttsd2si",
+            SseOpcode::Divss => "divss",
+            SseOpcode::Divsd => "divsd",
+            SseOpcode::Maxss => "maxss",
+            SseOpcode::Maxsd => "maxsd",
+            SseOpcode::Minss => "minss",
+            SseOpcode::Minsd => "minsd",
+            SseOpcode::Movaps => "movaps",
+            SseOpcode::Movd => "movd",
+            SseOpcode::Movss => "movss",
+            SseOpcode::Movsd => "movsd",
+            SseOpcode::Mulss => "mulss",
+            SseOpcode::Mulsd => "mulsd",
+            SseOpcode::Orps => "orps",
+            SseOpcode::Rcpss => "rcpss",
+            SseOpcode::Roundss => "roundss",
+            SseOpcode::Roundsd => "roundsd",
+            SseOpcode::Rsqrtss => "rsqrtss",
+            SseOpcode::Sqrtss => "sqrtss",
+            SseOpcode::Sqrtsd => "sqrtsd",
+            SseOpcode::Subss => "subss",
+            SseOpcode::Subsd => "subsd",
+            SseOpcode::Ucomiss => "ucomiss",
+            SseOpcode::Ucomisd => "ucomisd",
+            SseOpcode::Cmpss => "cmpss",
+            SseOpcode::Cmpsd => "cmpsd",
+            SseOpcode::Insertps => "insertps",
+        };
+        write!(fmt, "{}", name)
    }
 }

-/// Some scalar SSE operations requiring 2 operands r/m and r
-/// Each instruction is prefixed with the SSE version that introduced
-/// the particular instructions.
-/// TODO: Below only includes scalar operations. To be seen if packed will
-/// be added here.
-#[derive(Clone, PartialEq)]
-pub enum SSE_Op {
-    SSE_Addss,
-    SSE2_Addsd,
-    SSE_Comiss,
-    SSE2_Comisd,
-    SSE2_Cvtsd2ss,
-    SSE2_Cvtsd2si,
-    SSE_Cvtsi2ss,
-    SSE2_Cvtsi2sd,
-    SSE_Cvtss2si,
-    SSE2_Cvtss2sd,
-    SSE_Cvttss2si,
-    SSE2_Cvttsd2si,
-    SSE_Divss,
-    SSE2_Divsd,
-    SSE_Maxss,
-    SSE2_Maxsd,
-    SSE_Minss,
-    SSE2_Minsd,
-    SSE_Movss,
-    SSE2_Movsd,
-    SSE_Mulss,
-    SSE2_Mulsd,
-    SSE_Rcpss,
-    SSE41_Roundss,
-    SSE41_Roundsd,
-    SSE_Rsqrtss,
-    SSE_Sqrtss,
-    SSE2_Sqrtsd,
-    SSE_Subss,
-    SSE2_Subsd,
-    SSE_Ucomiss,
-    SSE2_Ucomisd,
-}
-
-/// Some SSE operations requiring 3 operands i, r/m, and r
-#[derive(Clone, PartialEq)]
-pub enum SSE_RMI_Op {
-    SSE_Cmpss,
-    SSE2_Cmpsd,
-    SSE41_Insertps,
-}
-
-impl SSE_Op {
-    pub(crate) fn to_string(&self) -> String {
-        match self {
-            SSE_Op::SSE_Addss => "addss".to_string(),
-            SSE_Op::SSE_Subss => "subss".to_string(),
-            SSE_Op::SSE_Movss => "movss".to_string(),
-            SSE_Op::SSE2_Movsd => "movsd".to_string(),
-            _ => "unimplemented sse_op".to_string(),
-        }
-    }
-}
-
-impl fmt::Debug for SSE_Op {
-    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "{}", self.to_string())
+impl ToString for SseOpcode {
+    fn to_string(&self) -> String {
+        format!("{:?}", self)
    }
 }

@ -289,30 +451,37 @@ pub enum ExtMode {
 }

 impl ExtMode {
-    pub(crate) fn to_string(&self) -> String {
+    pub(crate) fn src_size(&self) -> u8 {
        match self {
-            ExtMode::BL => "bl".to_string(),
-            ExtMode::BQ => "bq".to_string(),
-            ExtMode::WL => "wl".to_string(),
-            ExtMode::WQ => "wq".to_string(),
-            ExtMode::LQ => "lq".to_string(),
+            ExtMode::BL | ExtMode::BQ => 1,
+            ExtMode::WL | ExtMode::WQ => 2,
+            ExtMode::LQ => 4,
        }
    }
-
    pub(crate) fn dst_size(&self) -> u8 {
        match self {
-            ExtMode::BL => 4,
-            ExtMode::BQ => 8,
-            ExtMode::WL => 4,
-            ExtMode::WQ => 8,
-            ExtMode::LQ => 8,
+            ExtMode::BL | ExtMode::WL => 4,
+            ExtMode::BQ | ExtMode::WQ | ExtMode::LQ => 8,
        }
    }
 }

 impl fmt::Debug for ExtMode {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "{}", self.to_string())
+        let name = match self {
+            ExtMode::BL => "bl",
+            ExtMode::BQ => "bq",
+            ExtMode::WL => "wl",
+            ExtMode::WQ => "wq",
+            ExtMode::LQ => "lq",
+        };
+        write!(fmt, "{}", name)
+    }
+}
+
+impl ToString for ExtMode {
+    fn to_string(&self) -> String {
+        format!("{:?}", self)
    }
 }

@ -324,19 +493,20 @@ pub enum ShiftKind {
    RightS,
 }

-impl ShiftKind {
-    pub(crate) fn to_string(&self) -> String {
-        match self {
-            ShiftKind::Left => "shl".to_string(),
-            ShiftKind::RightZ => "shr".to_string(),
-            ShiftKind::RightS => "sar".to_string(),
-        }
+impl fmt::Debug for ShiftKind {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        let name = match self {
+            ShiftKind::Left => "shl",
+            ShiftKind::RightZ => "shr",
+            ShiftKind::RightS => "sar",
+        };
+        write!(fmt, "{}", name)
    }
 }

-impl fmt::Debug for ShiftKind {
-    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "{}", self.to_string())
+impl ToString for ShiftKind {
+    fn to_string(&self) -> String {
+        format!("{:?}", self)
    }
 }

@ -382,26 +552,24 @@ pub enum CC {
 }

 impl CC {
-    pub(crate) fn to_string(&self) -> String {
-        match self {
-            CC::O => "o".to_string(),
-            CC::NO => "no".to_string(),
-            CC::B => "b".to_string(),
-            CC::NB => "nb".to_string(),
-            CC::Z => "z".to_string(),
-            CC::NZ => "nz".to_string(),
-            CC::BE => "be".to_string(),
-            CC::NBE => "nbe".to_string(),
-            CC::S => "s".to_string(),
-            CC::NS => "ns".to_string(),
-            CC::L => "l".to_string(),
-            CC::NL => "nl".to_string(),
-            CC::LE => "le".to_string(),
-            CC::NLE => "nle".to_string(),
+    pub(crate) fn from_intcc(intcc: IntCC) -> Self {
+        match intcc {
+            IntCC::Equal => CC::Z,
+            IntCC::NotEqual => CC::NZ,
+            IntCC::SignedGreaterThanOrEqual => CC::NL,
+            IntCC::SignedGreaterThan => CC::NLE,
+            IntCC::SignedLessThanOrEqual => CC::LE,
+            IntCC::SignedLessThan => CC::L,
+            IntCC::UnsignedGreaterThanOrEqual => CC::NB,
+            IntCC::UnsignedGreaterThan => CC::NBE,
+            IntCC::UnsignedLessThanOrEqual => CC::BE,
+            IntCC::UnsignedLessThan => CC::B,
+            IntCC::Overflow => CC::O,
+            IntCC::NotOverflow => CC::NO,
        }
    }

-    pub(crate) fn invert(&self) -> CC {
+    pub(crate) fn invert(&self) -> Self {
        match self {
            CC::O => CC::NO,
            CC::NO => CC::O,
@ -433,7 +601,29 @@ impl CC {

 impl fmt::Debug for CC {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "{}", self.to_string())
+        let name = match self {
+            CC::O => "o",
+            CC::NO => "no",
+            CC::B => "b",
+            CC::NB => "nb",
+            CC::Z => "z",
+            CC::NZ => "nz",
+            CC::BE => "be",
+            CC::NBE => "nbe",
+            CC::S => "s",
+            CC::NS => "ns",
+            CC::L => "l",
+            CC::NL => "nl",
+            CC::LE => "le",
+            CC::NLE => "nle",
+        };
+        write!(fmt, "{}", name)
+    }
+}
+
+impl ToString for CC {
+    fn to_string(&self) -> String {
+        format!("{:?}", self)
    }
 }

--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
--- a/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
@ -1,19 +1,22 @@
 //! Lowering rules for X64.

-#![allow(dead_code)]
 #![allow(non_snake_case)]

+use log::trace;
 use regalloc::{Reg, RegClass, Writable};
+use smallvec::SmallVec;
+use std::convert::TryFrom;

-use crate::ir::condcodes::IntCC;
 use crate::ir::types;
+use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
-use crate::ir::{InstructionData, Opcode, Type};
+use crate::ir::{condcodes::IntCC, InstructionData, Opcode, TrapCode, Type};

 use crate::machinst::lower::*;
 use crate::machinst::*;
 use crate::result::CodegenResult;

+use crate::isa::x64::abi::*;
 use crate::isa::x64::inst::args::*;
 use crate::isa::x64::inst::*;
 use crate::isa::x64::X64Backend;
@ -31,6 +34,20 @@ fn is_int_ty(ty: Type) -> bool {
    }
 }

+fn is_bool_ty(ty: Type) -> bool {
+    match ty {
+        types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
+        _ => false,
+    }
+}
+
+fn is_float_ty(ty: Type) -> bool {
+    match ty {
+        types::F32 | types::F64 => true,
+        _ => false,
+    }
+}
+
 fn int_ty_is_64(ty: Type) -> bool {
    match ty {
        types::I8 | types::I16 | types::I32 => false,
@ -47,29 +64,17 @@ fn flt_ty_is_64(ty: Type) -> bool {
    }
 }

-fn int_ty_to_sizeB(ty: Type) -> u8 {
-    match ty {
-        types::I8 => 1,
-        types::I16 => 2,
-        types::I32 => 4,
-        types::I64 => 8,
-        _ => panic!("ity_to_sizeB"),
-    }
+fn iri_to_u64_imm(ctx: Ctx, inst: IRInst) -> Option<u64> {
+    ctx.get_constant(inst)
 }

-fn iri_to_u64_immediate<'a>(ctx: Ctx<'a>, iri: IRInst) -> Option<u64> {
-    let inst_data = ctx.data(iri);
-    if inst_data.opcode() == Opcode::Null {
-        Some(0)
-    } else {
-        match inst_data {
-            &InstructionData::UnaryImm { opcode: _, imm } => {
-                // Only has Into for i64; we use u64 elsewhere, so we cast.
-                let imm: i64 = imm.into();
-                Some(imm as u64)
-            }
-            _ => None,
-        }
+fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
+    match data {
+        &InstructionData::Trap { code, .. }
+        | &InstructionData::CondTrap { code, .. }
+        | &InstructionData::IntCondTrap { code, .. }
+        | &InstructionData::FloatCondTrap { code, .. } => Some(code),
+        _ => None,
    }
 }

@ -86,131 +91,514 @@ fn inst_condcode(data: &InstructionData) -> IntCC {
    }
 }

-fn intCC_to_x64_CC(cc: IntCC) -> CC {
-    match cc {
-        IntCC::Equal => CC::Z,
-        IntCC::NotEqual => CC::NZ,
-        IntCC::SignedGreaterThanOrEqual => CC::NL,
-        IntCC::SignedGreaterThan => CC::NLE,
-        IntCC::SignedLessThanOrEqual => CC::LE,
-        IntCC::SignedLessThan => CC::L,
-        IntCC::UnsignedGreaterThanOrEqual => CC::NB,
-        IntCC::UnsignedGreaterThan => CC::NBE,
-        IntCC::UnsignedLessThanOrEqual => CC::BE,
-        IntCC::UnsignedLessThan => CC::B,
-        IntCC::Overflow => CC::O,
-        IntCC::NotOverflow => CC::NO,
+fn ldst_offset(data: &InstructionData) -> Option<i32> {
+    match data {
+        &InstructionData::Load { offset, .. }
+        | &InstructionData::StackLoad { offset, .. }
+        | &InstructionData::LoadComplex { offset, .. }
+        | &InstructionData::Store { offset, .. }
+        | &InstructionData::StackStore { offset, .. }
+        | &InstructionData::StoreComplex { offset, .. } => Some(offset.into()),
+        _ => None,
    }
 }

-fn input_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, input: usize) -> Reg {
-    let inputs = ctx.get_input(iri, input);
+/// Identifier for a particular input of an instruction.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+struct InsnInput {
+    insn: IRInst,
+    input: usize,
+}
+
+/// Identifier for a particular output of an instruction.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+struct InsnOutput {
+    insn: IRInst,
+    output: usize,
+}
+
+fn input_to_reg<'a>(ctx: Ctx<'a>, spec: InsnInput) -> Reg {
+    let inputs = ctx.get_input(spec.insn, spec.input);
    ctx.use_input_reg(inputs);
    inputs.reg
 }

-fn output_to_reg<'a>(ctx: Ctx<'a>, iri: IRInst, output: usize) -> Writable<Reg> {
-    ctx.get_output(iri, output)
+/// Try to use an immediate for constant inputs, and a register otherwise.
+/// TODO: handle memory as well!
+fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
+    let imm = ctx.get_input(spec.insn, spec.input).constant.and_then(|x| {
+        let as_u32 = x as u32;
+        let extended = as_u32 as u64;
+        // If the truncation and sign-extension don't change the value, use it.
+        if extended == x {
+            Some(as_u32)
+        } else {
+            None
+        }
+    });
+    match imm {
+        Some(x) => RegMemImm::imm(x),
+        None => RegMemImm::reg(input_to_reg(ctx, spec)),
+    }
+}
+
+fn output_to_reg<'a>(ctx: Ctx<'a>, spec: InsnOutput) -> Writable<Reg> {
+    ctx.get_output(spec.insn, spec.output)
 }

 //=============================================================================
 // Top-level instruction lowering entry point, for one instruction.

 /// Actually codegen an instruction's results into registers.
-fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
-    let op = ctx.data(iri).opcode();
-    let ty = if ctx.num_outputs(iri) == 1 {
-        Some(ctx.output_ty(iri, 0))
+fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) -> CodegenResult<()> {
+    let op = ctx.data(insn).opcode();
+
+    let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
+        .map(|i| InsnInput { insn, input: i })
+        .collect();
+    let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn))
+        .map(|i| InsnOutput { insn, output: i })
+        .collect();
+
+    let ty = if outputs.len() > 0 {
+        Some(ctx.output_ty(insn, 0))
    } else {
        None
    };

-    // This is all outstandingly feeble.  TODO: much better!
    match op {
        Opcode::Iconst => {
-            if let Some(w64) = iri_to_u64_immediate(ctx, iri) {
+            if let Some(w64) = iri_to_u64_imm(ctx, insn) {
                // Get exactly the bit pattern in 'w64' into the dest.  No
                // monkeying with sign extension etc.
-                let dstIs64 = w64 > 0xFFFF_FFFF;
-                let regD = output_to_reg(ctx, iri, 0);
-                ctx.emit(Inst::imm_r(dstIs64, w64, regD));
+                let dst_is_64 = w64 > 0xFFFF_FFFF;
+                let dst = output_to_reg(ctx, outputs[0]);
+                ctx.emit(Inst::imm_r(dst_is_64, w64, dst));
            } else {
                unimplemented!();
            }
        }

        Opcode::Iadd | Opcode::Isub => {
-            let regD = output_to_reg(ctx, iri, 0);
-            let regL = input_to_reg(ctx, iri, 0);
-            let regR = input_to_reg(ctx, iri, 1);
-            let is64 = int_ty_is_64(ty.unwrap());
-            let how = if op == Opcode::Iadd {
-                RMI_R_Op::Add
+            let lhs = input_to_reg(ctx, inputs[0]);
+            let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
+            let dst = output_to_reg(ctx, outputs[0]);
+
+            // TODO For add, try to commute the operands if one is an immediate.
+
+            let is_64 = int_ty_is_64(ty.unwrap());
+            let alu_op = if op == Opcode::Iadd {
+                AluRmiROpcode::Add
            } else {
-                RMI_R_Op::Sub
+                AluRmiROpcode::Sub
            };
-            ctx.emit(Inst::mov_r_r(true, regL, regD));
-            ctx.emit(Inst::alu_rmi_r(is64, how, RMI::reg(regR), regD));
+
+            ctx.emit(Inst::mov_r_r(true, lhs, dst));
+            ctx.emit(Inst::alu_rmi_r(is_64, alu_op, rhs, dst));
        }

        Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
            // TODO: implement imm shift value into insn
-            let tySL = ctx.input_ty(iri, 0);
-            let tyD = ctx.output_ty(iri, 0); // should be the same as tySL
-            let regSL = input_to_reg(ctx, iri, 0);
-            let regSR = input_to_reg(ctx, iri, 1);
-            let regD = output_to_reg(ctx, iri, 0);
-            if tyD == tySL && (tyD == types::I32 || tyD == types::I64) {
-                let how = match op {
-                    Opcode::Ishl => ShiftKind::Left,
-                    Opcode::Ushr => ShiftKind::RightZ,
-                    Opcode::Sshr => ShiftKind::RightS,
-                    _ => unreachable!(),
-                };
-                let is64 = tyD == types::I64;
-                let r_rcx = regs::rcx();
-                let w_rcx = Writable::<Reg>::from_reg(r_rcx);
-                ctx.emit(Inst::mov_r_r(true, regSL, regD));
-                ctx.emit(Inst::mov_r_r(true, regSR, w_rcx));
-                ctx.emit(Inst::shift_r(is64, how, None /*%cl*/, regD));
+            let dst_ty = ctx.output_ty(insn, 0);
+            assert_eq!(ctx.input_ty(insn, 0), dst_ty);
+            assert!(dst_ty == types::I32 || dst_ty == types::I64);
+
+            let lhs = input_to_reg(ctx, inputs[0]);
+            let rhs = input_to_reg(ctx, inputs[1]);
+            let dst = output_to_reg(ctx, outputs[0]);
+
+            let shift_kind = match op {
+                Opcode::Ishl => ShiftKind::Left,
+                Opcode::Ushr => ShiftKind::RightZ,
+                Opcode::Sshr => ShiftKind::RightS,
+                _ => unreachable!(),
+            };
+
+            let is_64 = dst_ty == types::I64;
+            let w_rcx = Writable::from_reg(regs::rcx());
+            ctx.emit(Inst::mov_r_r(true, lhs, dst));
+            ctx.emit(Inst::mov_r_r(true, rhs, w_rcx));
+            ctx.emit(Inst::shift_r(is_64, shift_kind, None /*%cl*/, dst));
+        }
+
+        Opcode::Uextend
+        | Opcode::Sextend
+        | Opcode::Bint
+        | Opcode::Breduce
+        | Opcode::Bextend
+        | Opcode::Ireduce => {
+            let src_ty = ctx.input_ty(insn, 0);
+            let dst_ty = ctx.output_ty(insn, 0);
+
+            // TODO: if the source operand is a load, incorporate that.
+            let src = input_to_reg(ctx, inputs[0]);
+            let dst = output_to_reg(ctx, outputs[0]);
+
+            let ext_mode = match (src_ty.bits(), dst_ty.bits()) {
+                (1, 32) | (8, 32) => ExtMode::BL,
+                (1, 64) | (8, 64) => ExtMode::BQ,
+                (16, 32) => ExtMode::WL,
+                (16, 64) => ExtMode::WQ,
+                (32, 64) => ExtMode::LQ,
+                _ => unreachable!(
+                    "unexpected extension kind from {:?} to {:?}",
+                    src_ty, dst_ty
+                ),
+            };
+
+            if op == Opcode::Sextend {
+                ctx.emit(Inst::movsx_rm_r(ext_mode, RegMem::reg(src), dst));
            } else {
-                unimplemented!()
+                // All of these other opcodes are simply a move from a zero-extended source.  Here
+                // is why this works, in each case:
+                //
+                // - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we
+                //   merely need to zero-extend here.
+                //
+                // - Breduce, Bextend: changing width of a boolean. We represent a
+                //   bool as a 0 or 1, so again, this is a zero-extend / no-op.
+                //
+                // - Ireduce: changing width of an integer. Smaller ints are stored
+                //   with undefined high-order bits, so we can simply do a copy.
+                ctx.emit(Inst::movzx_rm_r(ext_mode, RegMem::reg(src), dst));
            }
        }

-        Opcode::Uextend | Opcode::Sextend => {
-            // TODO: this is all extremely lame, all because Mov{ZX,SX}_M_R
-            // don't accept a register source operand.  They should be changed
-            // so as to have _RM_R form.
-            // TODO2: if the source operand is a load, incorporate that.
-            let isZX = op == Opcode::Uextend;
-            let tyS = ctx.input_ty(iri, 0);
-            let tyD = ctx.output_ty(iri, 0);
-            let regS = input_to_reg(ctx, iri, 0);
-            let regD = output_to_reg(ctx, iri, 0);
-            ctx.emit(Inst::mov_r_r(true, regS, regD));
-            match (tyS, tyD, isZX) {
-                (types::I8, types::I64, false) => {
-                    ctx.emit(Inst::shift_r(true, ShiftKind::Left, Some(56), regD));
-                    ctx.emit(Inst::shift_r(true, ShiftKind::RightS, Some(56), regD));
-                }
-                _ => unimplemented!(),
-            }
+        Opcode::Icmp => {
+            let condcode = inst_condcode(ctx.data(insn));
+            let cc = CC::from_intcc(condcode);
+            let ty = ctx.input_ty(insn, 0);
+
+            // TODO Try to commute the operands (and invert the condition) if one is an immediate.
+            let lhs = input_to_reg(ctx, inputs[0]);
+            let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
+            let dst = output_to_reg(ctx, outputs[0]);
+
+            // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
+            // us dst - src at the machine instruction level, so invert operands.
+            ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
+            ctx.emit(Inst::setcc(cc, dst));
        }

        Opcode::FallthroughReturn | Opcode::Return => {
-            for i in 0..ctx.num_inputs(iri) {
-                let src_reg = input_to_reg(ctx, iri, i);
+            for i in 0..ctx.num_inputs(insn) {
+                let src_reg = input_to_reg(ctx, inputs[i]);
                let retval_reg = ctx.retval(i);
                if src_reg.get_class() == RegClass::I64 {
                    ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
                } else if src_reg.get_class() == RegClass::V128 {
-                    ctx.emit(Inst::xmm_r_r(SSE_Op::SSE2_Movsd, src_reg, retval_reg));
+                    ctx.emit(Inst::xmm_mov_rm_r(
+                        SseOpcode::Movsd,
+                        RegMem::reg(src_reg),
+                        retval_reg,
+                    ));
                }
            }
            // N.B.: the Ret itself is generated by the ABI.
        }

+        Opcode::Call | Opcode::CallIndirect => {
+            let loc = ctx.srcloc(insn);
+            let (mut abi, inputs) = match op {
+                Opcode::Call => {
+                    let (extname, dist) = ctx.call_target(insn).unwrap();
+                    let sig = ctx.call_sig(insn).unwrap();
+                    assert!(inputs.len() == sig.params.len());
+                    assert!(outputs.len() == sig.returns.len());
+                    (
+                        X64ABICall::from_func(sig, &extname, dist, loc)?,
+                        &inputs[..],
+                    )
+                }
+
+                Opcode::CallIndirect => {
+                    let ptr = input_to_reg(ctx, inputs[0]);
+                    let sig = ctx.call_sig(insn).unwrap();
+                    assert!(inputs.len() - 1 == sig.params.len());
+                    assert!(outputs.len() == sig.returns.len());
+                    (X64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
+                }
+
+                _ => unreachable!(),
+            };
+
+            abi.emit_stack_pre_adjust(ctx);
+            assert!(inputs.len() == abi.num_args());
+            for (i, input) in inputs.iter().enumerate() {
+                let arg_reg = input_to_reg(ctx, *input);
+                abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
+            }
+            abi.emit_call(ctx);
+            for (i, output) in outputs.iter().enumerate() {
+                let retval_reg = output_to_reg(ctx, *output);
+                abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
+            }
+            abi.emit_stack_post_adjust(ctx);
+        }
+
+        Opcode::Debugtrap => {
+            ctx.emit(Inst::Hlt);
+        }
+
+        Opcode::Trap => {
+            let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap());
+            ctx.emit(Inst::Ud2 { trap_info })
+        }
+
+        Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => {
+            let lhs = input_to_reg(ctx, inputs[0]);
+            let rhs = input_to_reg(ctx, inputs[1]);
+            let dst = output_to_reg(ctx, outputs[0]);
+            let is_64 = flt_ty_is_64(ty.unwrap());
+            if !is_64 {
+                let sse_op = match op {
+                    Opcode::Fadd => SseOpcode::Addss,
+                    Opcode::Fsub => SseOpcode::Subss,
+                    Opcode::Fmul => SseOpcode::Mulss,
+                    Opcode::Fdiv => SseOpcode::Divss,
+                    // TODO Fmax, Fmin.
+                    _ => unimplemented!(),
+                };
+                ctx.emit(Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(lhs), dst));
+                ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst));
+            } else {
+                unimplemented!("unimplemented lowering for opcode {:?}", op);
+            }
+        }
+
+        Opcode::Fcopysign => {
+            let dst = output_to_reg(ctx, outputs[0]);
+            let lhs = input_to_reg(ctx, inputs[0]);
+            let rhs = input_to_reg(ctx, inputs[1]);
+            if !flt_ty_is_64(ty.unwrap()) {
+                // movabs   0x8000_0000, tmp_gpr1
+                // movd     tmp_gpr1, tmp_xmm1
+                // movaps   tmp_xmm1, dst
+                // andnps   src_1, dst
+                // movss    src_2, tmp_xmm2
+                // andps    tmp_xmm1, tmp_xmm2
+                // orps     tmp_xmm2, dst
+                let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, I32);
+                let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, F32);
+                let tmp_xmm2 = ctx.alloc_tmp(RegClass::V128, F32);
+                ctx.emit(Inst::imm_r(true, 0x8000_0000, tmp_gpr1));
+                ctx.emit(Inst::xmm_mov_rm_r(
+                    SseOpcode::Movd,
+                    RegMem::reg(tmp_gpr1.to_reg()),
+                    tmp_xmm1,
+                ));
+                ctx.emit(Inst::xmm_mov_rm_r(
+                    SseOpcode::Movaps,
+                    RegMem::reg(tmp_xmm1.to_reg()),
+                    dst,
+                ));
+                ctx.emit(Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(lhs), dst));
+                ctx.emit(Inst::xmm_mov_rm_r(
+                    SseOpcode::Movss,
+                    RegMem::reg(rhs),
+                    tmp_xmm2,
+                ));
+                ctx.emit(Inst::xmm_rm_r(
+                    SseOpcode::Andps,
+                    RegMem::reg(tmp_xmm1.to_reg()),
+                    tmp_xmm2,
+                ));
+                ctx.emit(Inst::xmm_rm_r(
+                    SseOpcode::Orps,
+                    RegMem::reg(tmp_xmm2.to_reg()),
+                    dst,
+                ));
+            } else {
+                unimplemented!("{:?} for non 32-bit destination is not supported", op);
+            }
+        }
+
+        Opcode::Load
+        | Opcode::Uload8
+        | Opcode::Sload8
+        | Opcode::Uload16
+        | Opcode::Sload16
+        | Opcode::Uload32
+        | Opcode::Sload32
+        | Opcode::LoadComplex
+        | Opcode::Uload8Complex
+        | Opcode::Sload8Complex
+        | Opcode::Uload16Complex
+        | Opcode::Sload16Complex
+        | Opcode::Uload32Complex
+        | Opcode::Sload32Complex => {
+            let offset = ldst_offset(ctx.data(insn)).unwrap();
+
+            let elem_ty = match op {
+                Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
+                    types::I8
+                }
+                Opcode::Sload16
+                | Opcode::Uload16
+                | Opcode::Sload16Complex
+                | Opcode::Uload16Complex => types::I16,
+                Opcode::Sload32
+                | Opcode::Uload32
+                | Opcode::Sload32Complex
+                | Opcode::Uload32Complex => types::I32,
+                Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
+                _ => unimplemented!(),
+            };
+
+            let ext_mode = match elem_ty.bytes() {
+                1 => Some(ExtMode::BQ),
+                2 => Some(ExtMode::WQ),
+                4 => Some(ExtMode::LQ),
+                _ => None,
+            };
+
+            let sign_extend = match op {
+                Opcode::Sload8
+                | Opcode::Sload8Complex
+                | Opcode::Sload16
+                | Opcode::Sload16Complex
+                | Opcode::Sload32
+                | Opcode::Sload32Complex => true,
+                _ => false,
+            };
+
+            let is_float = is_float_ty(elem_ty);
+
+            let addr = match op {
+                Opcode::Load
+                | Opcode::Uload8
+                | Opcode::Sload8
+                | Opcode::Uload16
+                | Opcode::Sload16
+                | Opcode::Uload32
+                | Opcode::Sload32 => {
+                    assert!(inputs.len() == 1, "only one input for load operands");
+                    let base = input_to_reg(ctx, inputs[0]);
+                    Amode::imm_reg(offset as u32, base)
+                }
+
+                Opcode::LoadComplex
+                | Opcode::Uload8Complex
+                | Opcode::Sload8Complex
+                | Opcode::Uload16Complex
+                | Opcode::Sload16Complex
+                | Opcode::Uload32Complex
+                | Opcode::Sload32Complex => {
+                    assert!(
+                        inputs.len() == 2,
+                        "can't handle more than two inputs in complex load"
+                    );
+                    let base = input_to_reg(ctx, inputs[0]);
+                    let index = input_to_reg(ctx, inputs[1]);
+                    let shift = 0;
+                    Amode::imm_reg_reg_shift(offset as u32, base, index, shift)
+                }
+
+                _ => unreachable!(),
+            };
+
+            let dst = output_to_reg(ctx, outputs[0]);
+            match (sign_extend, is_float) {
+                (true, false) => {
+                    // The load is sign-extended only when the output size is lower than 64 bits,
+                    // so ext-mode is defined in this case.
+                    ctx.emit(Inst::movsx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst));
+                }
+                (false, false) => {
+                    if elem_ty.bytes() == 8 {
+                        // Use a plain load.
+                        ctx.emit(Inst::mov64_m_r(addr, dst))
+                    } else {
+                        // Use a zero-extended load.
+                        ctx.emit(Inst::movzx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst))
+                    }
+                }
+                (_, true) => {
+                    ctx.emit(match elem_ty {
+                        F32 => Inst::xmm_mov_rm_r(SseOpcode::Movd, RegMem::mem(addr), dst),
+                        _ => unimplemented!("FP load not 32-bit"),
+                    });
+                }
+            }
+        }
+
+        Opcode::Store
+        | Opcode::Istore8
+        | Opcode::Istore16
+        | Opcode::Istore32
+        | Opcode::StoreComplex
+        | Opcode::Istore8Complex
+        | Opcode::Istore16Complex
+        | Opcode::Istore32Complex => {
+            let offset = ldst_offset(ctx.data(insn)).unwrap();
+
+            let elem_ty = match op {
+                Opcode::Istore8 | Opcode::Istore8Complex => types::I8,
+                Opcode::Istore16 | Opcode::Istore16Complex => types::I16,
+                Opcode::Istore32 | Opcode::Istore32Complex => types::I32,
+                Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0),
+                _ => unreachable!(),
+            };
+            let is_float = is_float_ty(elem_ty);
+
+            let addr = match op {
+                Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
+                    assert!(
+                        inputs.len() == 2,
+                        "only one input for store memory operands"
+                    );
+                    let base = input_to_reg(ctx, inputs[1]);
+                    // TODO sign?
+                    Amode::imm_reg(offset as u32, base)
+                }
+
+                Opcode::StoreComplex
+                | Opcode::Istore8Complex
+                | Opcode::Istore16Complex
+                | Opcode::Istore32Complex => {
+                    assert!(
+                        inputs.len() == 3,
+                        "can't handle more than two inputs in complex load"
+                    );
+                    let base = input_to_reg(ctx, inputs[1]);
+                    let index = input_to_reg(ctx, inputs[2]);
+                    let shift = 0;
+                    Amode::imm_reg_reg_shift(offset as u32, base, index, shift)
+                }
+
+                _ => unreachable!(),
+            };
+
+            let src = input_to_reg(ctx, inputs[0]);
+
+            if is_float {
+                ctx.emit(match elem_ty {
+                    F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr),
+                    _ => unimplemented!("FP store not 32-bit"),
+                });
+            } else {
+                ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr));
+            }
+        }
+
+        Opcode::StackAddr => {
+            let (stack_slot, offset) = match *ctx.data(insn) {
+                InstructionData::StackLoad {
+                    opcode: Opcode::StackAddr,
+                    stack_slot,
+                    offset,
+                } => (stack_slot, offset),
+                _ => unreachable!(),
+            };
+            let dst = output_to_reg(ctx, outputs[0]);
+            let offset: i32 = offset.into();
+            println!("stackslot_addr: {:?} @ off{}", stack_slot, offset);
+            let inst = ctx
+                .abi()
+                .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst);
+            ctx.emit(inst);
+        }
+
        Opcode::IaddImm
        | Opcode::ImulImm
        | Opcode::UdivImm
@ -240,25 +628,10 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
        | Opcode::SshrImm => {
            panic!("ALU+imm and ALU+carry ops should not appear here!");
        }
-        Opcode::Fadd | Opcode::Fsub => {
-            let regD = output_to_reg(ctx, iri, 0);
-            let regL = input_to_reg(ctx, iri, 0);
-            let regR = input_to_reg(ctx, iri, 1);
-            let is64 = flt_ty_is_64(ty.unwrap());
-            if !is64 {
-                let inst = if op == Opcode::Fadd {
-                    SSE_Op::SSE_Addss
-                } else {
-                    SSE_Op::SSE_Subss
-                };
-                ctx.emit(Inst::xmm_r_r(SSE_Op::SSE_Movss, regL, regD));
-                ctx.emit(Inst::xmm_rm_r(inst, RM::reg(regR), regD));
-            } else {
-                unimplemented!("unimplemented lowering for opcode {:?}", op);
-            }
-        }
        _ => unimplemented!("unimplemented lowering for opcode {:?}", op),
    }
+
+    Ok(())
 }

 //=============================================================================
@ -268,8 +641,7 @@ impl LowerBackend for X64Backend {
    type MInst = Inst;

    fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
-        lower_insn_to_regs(ctx, ir_inst);
-        Ok(())
+        lower_insn_to_regs(ctx, ir_inst)
    }

    fn lower_branch_group<C: LowerCtx<I = Inst>>(
@ -287,60 +659,79 @@ impl LowerBackend for X64Backend {
        // verifier pass.
        assert!(branches.len() <= 2);

-        let mut unimplemented = false;
-
        if branches.len() == 2 {
            // Must be a conditional branch followed by an unconditional branch.
            let op0 = ctx.data(branches[0]).opcode();
            let op1 = ctx.data(branches[1]).opcode();

-            println!(
-                "QQQQ lowering two-branch group: opcodes are {:?} and {:?}",
-                op0, op1
+            trace!(
+                "lowering two-branch group: opcodes are {:?} and {:?}",
+                op0,
+                op1
            );
-
            assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
+
            let taken = BranchTarget::Label(targets[0]);
            let not_taken = match op1 {
                Opcode::Jump => BranchTarget::Label(targets[1]),
                Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
                _ => unreachable!(), // assert above.
            };
+
            match op0 {
                Opcode::Brz | Opcode::Brnz => {
-                    let tyS = ctx.input_ty(branches[0], 0);
-                    if is_int_ty(tyS) {
-                        let rS = input_to_reg(ctx, branches[0], 0);
+                    let src_ty = ctx.input_ty(branches[0], 0);
+                    if is_int_ty(src_ty) || is_bool_ty(src_ty) {
+                        let src = input_to_reg(
+                            ctx,
+                            InsnInput {
+                                insn: branches[0],
+                                input: 0,
+                            },
+                        );
                        let cc = match op0 {
                            Opcode::Brz => CC::Z,
                            Opcode::Brnz => CC::NZ,
                            _ => unreachable!(),
                        };
-                        let sizeB = int_ty_to_sizeB(tyS);
-                        ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::imm(0), rS));
-                        ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
+                        let size_bytes = src_ty.bytes() as u8;
+                        ctx.emit(Inst::cmp_rmi_r(size_bytes, RegMemImm::imm(0), src));
+                        ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
                    } else {
-                        unimplemented = true;
+                        unimplemented!("brz/brnz with non-int type {:?}", src_ty);
                    }
                }
+
                Opcode::BrIcmp => {
-                    let tyS = ctx.input_ty(branches[0], 0);
-                    if is_int_ty(tyS) {
-                        let rSL = input_to_reg(ctx, branches[0], 0);
-                        let rSR = input_to_reg(ctx, branches[0], 1);
-                        let cc = intCC_to_x64_CC(inst_condcode(ctx.data(branches[0])));
-                        let sizeB = int_ty_to_sizeB(tyS);
-                        // FIXME verify rSR vs rSL ordering
-                        ctx.emit(Inst::cmp_rmi_r(sizeB, RMI::reg(rSR), rSL));
-                        ctx.emit(Inst::jmp_cond_symm(cc, taken, not_taken));
+                    let src_ty = ctx.input_ty(branches[0], 0);
+                    if is_int_ty(src_ty) || is_bool_ty(src_ty) {
+                        let lhs = input_to_reg(
+                            ctx,
+                            InsnInput {
+                                insn: branches[0],
+                                input: 0,
+                            },
+                        );
+                        let rhs = input_to_reg_mem_imm(
+                            ctx,
+                            InsnInput {
+                                insn: branches[0],
+                                input: 1,
+                            },
+                        );
+                        let cc = CC::from_intcc(inst_condcode(ctx.data(branches[0])));
+                        let byte_size = src_ty.bytes() as u8;
+                        // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
+                        // us dst - src at the machine instruction level, so invert operands.
+                        ctx.emit(Inst::cmp_rmi_r(byte_size, rhs, lhs));
+                        ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
                    } else {
-                        unimplemented = true;
+                        unimplemented!("bricmp with non-int type {:?}", src_ty);
                    }
                }
+
                // TODO: Brif/icmp, Brff/icmp, jump tables
-                _ => {
-                    unimplemented = true;
-                }
+                _ => unimplemented!("branch opcode"),
            }
        } else {
            assert!(branches.len() == 1);
@ -348,23 +739,13 @@ impl LowerBackend for X64Backend {
            // Must be an unconditional branch or trap.
            let op = ctx.data(branches[0]).opcode();
            match op {
-                Opcode::Jump => {
+                Opcode::Jump | Opcode::Fallthrough => {
                    ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
                }
-                Opcode::Fallthrough => {
-                    ctx.emit(Inst::jmp_known(BranchTarget::Label(targets[0])));
-                }
-                Opcode::Trap => {
-                    unimplemented = true;
-                }
                _ => panic!("Unknown branch type!"),
            }
        }

-        if unimplemented {
-            unimplemented!("lower_branch_group(x64): can't handle: {:?}", branches);
-        }
-
        Ok(())
    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
@ -40,7 +40,7 @@ impl X64Backend {
    fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
        // This performs lowering to VCode, register-allocates the code, computes
        // block layout and finalizes branches. The result is ready for binary emission.
-        let abi = Box::new(abi::X64ABIBody::new(&func, flags));
+        let abi = Box::new(abi::X64ABIBody::new(&func, flags)?);
        compile::compile::<Self>(&func, self, abi)
    }
 }
--- a/third_party/rust/cranelift-codegen/src/isa/x86/enc_tables.rs
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/enc_tables.rs
@ -596,6 +596,100 @@ fn expand_minmax(
    cfg.recompute_block(pos.func, done);
 }

+/// This legalization converts a minimum/maximum operation into a sequence that matches the
+/// non-x86-friendly WebAssembly semantics of NaN handling. This logic is kept separate from
+/// [expand_minmax] above (the scalar version) for code clarity.
+fn expand_minmax_vector(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &dyn TargetIsa,
+) {
+    let ty = func.dfg.ctrl_typevar(inst);
+    debug_assert!(ty.is_vector());
+    let (x, y, x86_opcode, is_max) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Fmin,
+            args,
+        } => (args[0], args[1], ir::Opcode::X86Fmin, false),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Fmax,
+            args,
+        } => (args[0], args[1], ir::Opcode::X86Fmax, true),
+        _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
+    };
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // This sequence is complex due to how x86 handles NaNs and +0/-0. If x86 finds a NaN in
+    // either lane it returns the second operand; likewise, if both operands are in {+0.0, -0.0}
+    // it returns the second operand. To match the behavior of "return the minimum of the
+    // operands or a canonical NaN if either operand is NaN," we must compare in both
+    // directions.
+    let (forward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, x, y);
+    let forward = dfg.first_result(forward_inst);
+    let (backward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, y, x);
+    let backward = dfg.first_result(backward_inst);
+
+    let (value, mask) = if is_max {
+        // For maximum:
+        // Find any differences between the forward and backward `max` operation.
+        let difference = pos.ins().bxor(forward, backward);
+        // Merge in the differences.
+        let propagate_nans_and_plus_zero = pos.ins().bor(backward, difference);
+        let value = pos.ins().fsub(propagate_nans_and_plus_zero, difference);
+        // Discover which lanes have NaNs in them.
+        let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, difference, value);
+        (value, find_nan_lanes_mask)
+    } else {
+        // For minimum:
+        // If either lane is a NaN, we want to use these bits, not the second operand bits.
+        let propagate_nans = pos.ins().bor(backward, forward);
+        // Find which lanes contain a NaN with an unordered comparison, filling the mask with
+        // 1s.
+        let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, forward, propagate_nans);
+        let bitcast_find_nan_lanes_mask = pos.ins().raw_bitcast(ty, find_nan_lanes_mask);
+        // Then flood the value lane with all 1s if that lane is a NaN. This causes all NaNs
+        // along this code path to be quieted and negative: after the upcoming shift and and_not,
+        // all upper bits (sign, exponent, and payload MSB) will be 1s.
+        let tmp = pos.ins().bor(propagate_nans, bitcast_find_nan_lanes_mask);
+        (tmp, bitcast_find_nan_lanes_mask)
+    };
+
+    // During this lowering we will need to know how many bits to shift by and what type to
+    // convert to when using an integer shift. Recall that an IEEE754 number looks like:
+    // `[sign bit] [exponent bits] [significand bits]`
+    // A quiet NaN has all exponent bits set to 1 and the most significant bit of the
+    // significand set to 1; a signaling NaN has the same exponent but the MSB of the
+    // significand is set to 0. The payload of the NaN is the remaining significand bits, and
+    // WebAssembly assumes a canonical NaN is quiet and has 0s in its payload. To compute this
+    // canonical NaN, we create a mask for the top 10 bits on F32X4 (1 sign + 8 exp. + 1 MSB
+    // sig.) and the top 13 bits on F64X2 (1 sign + 11 exp. + 1 MSB sig.). This means that all
+    // NaNs produced with the mask will be negative (`-NaN`) which is allowed by the sign
+    // non-determinism in the spec: https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0
+    let (shift_by, ty_as_int) = match ty {
+        F32X4 => (10, I32X4),
+        F64X2 => (13, I64X2),
+        _ => unimplemented!("this legalization only understands 128-bit floating point types"),
+    };
+
+    // In order to clear the NaN payload for canonical NaNs, we shift right the NaN lanes (all
+    // 1s) leaving 0s in the top bits. Remember that non-NaN lanes are all 0s so this has
+    // little effect.
+    let mask_as_int = pos.ins().raw_bitcast(ty_as_int, mask);
+    let shift_mask = pos.ins().ushr_imm(mask_as_int, shift_by);
+    let shift_mask_as_float = pos.ins().raw_bitcast(ty, shift_mask);
+
+    // Finally, we replace the value with `value & ~shift_mask`. For non-NaN lanes, this is
+    // equivalent to `... & 1111...` but for NaN lanes this will only have 1s in the top bits,
+    // clearing the payload.
+    pos.func
+        .dfg
+        .replace(inst)
+        .band_not(value, shift_mask_as_float);
+}
+
 /// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to
 /// i64 with a pattern, the rest needs more code.
 ///
@ -964,6 +1058,61 @@ fn expand_fcvt_to_sint_sat(
    cfg.recompute_block(pos.func, done_block);
 }

+/// This legalization converts a vector of 32-bit floating point lanes to signed integer lanes
+/// using CVTTPS2DQ (see encoding of `x86_cvtt2si`). This logic is separate from [expand_fcvt_to_sint_sat]
+/// above (the scalar version), only due to how the transform groups are set up; TODO if we change
+/// the SIMD legalization groups, then this logic could be merged into [expand_fcvt_to_sint_sat]
+/// (see https://github.com/bytecodealliance/wasmtime/issues/1745).
+fn expand_fcvt_to_sint_sat_vector(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &dyn TargetIsa,
+) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    if let ir::InstructionData::Unary {
+        opcode: ir::Opcode::FcvtToSintSat,
+        arg,
+    } = pos.func.dfg[inst]
+    {
+        let controlling_type = pos.func.dfg.ctrl_typevar(inst);
+        if controlling_type == I32X4 {
+            debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4);
+            // We must both quiet any NaNs--setting that lane to 0--and saturate any
+            // lanes that might overflow during conversion to the highest/lowest signed integer
+            // allowed in that lane.
+
+            // Saturate NaNs: `fcmp eq` will not match if a lane contains a NaN. We use ANDPS to
+            // avoid doing the comparison twice (we need the zeroed lanes to find differences).
+            let zeroed_nans = pos.ins().fcmp(FloatCC::Equal, arg, arg);
+            let zeroed_nans_bitcast = pos.ins().raw_bitcast(F32X4, zeroed_nans);
+            let zeroed_nans_copy = pos.ins().band(arg, zeroed_nans_bitcast);
+
+            // Find differences with the zeroed lanes (we will only use the MSB: 1 if positive or
+            // NaN, 0 otherwise).
+            let differences = pos.ins().bxor(zeroed_nans_bitcast, arg);
+            let differences_bitcast = pos.ins().raw_bitcast(I32X4, differences);
+
+            // Convert the numeric lanes. CVTTPS2DQ will mark overflows with 0x80000000 (MSB set).
+            let converted = pos.ins().x86_cvtt2si(I32X4, zeroed_nans_copy);
+
+            // Create a mask of all 1s only on positive overflow, 0s otherwise. This uses the MSB
+            // of `differences` (1 when positive or NaN) and the MSB of `converted` (1 on positive
+            // overflow).
+            let tmp = pos.ins().band(differences_bitcast, converted);
+            let mask = pos.ins().sshr_imm(tmp, 31);
+
+            // Apply the mask to create 0x7FFFFFFF for positive overflow. XOR of all 0s (all other
+            // cases) has no effect.
+            pos.func.dfg.replace(inst).bxor(converted, mask);
+        } else {
+            unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None))
+        }
+    }
+}
+
 fn expand_fcvt_to_uint(
    inst: ir::Inst,
    func: &mut ir::Function,
--- a/third_party/rust/cranelift-codegen/src/legalizer/heap.rs
+++ b/third_party/rust/cranelift-codegen/src/legalizer/heap.rs
@ -66,19 +66,14 @@ fn dynamic_addr(

    // Start with the bounds check. Trap if `offset + access_size > bound`.
    let bound = pos.ins().global_value(offset_ty, bound_gv);
-    let oob;
-    if access_size == 1 {
+    let (cc, lhs, bound) = if access_size == 1 {
        // `offset > bound - 1` is the same as `offset >= bound`.
-        oob = pos
-            .ins()
-            .icmp(IntCC::UnsignedGreaterThanOrEqual, offset, bound);
+        (IntCC::UnsignedGreaterThanOrEqual, offset, bound)
    } else if access_size <= min_size {
        // We know that bound >= min_size, so here we can compare `offset > bound - access_size`
        // without wrapping.
        let adj_bound = pos.ins().iadd_imm(bound, -(access_size as i64));
-        oob = pos
-            .ins()
-            .icmp(IntCC::UnsignedGreaterThan, offset, adj_bound);
+        (IntCC::UnsignedGreaterThan, offset, adj_bound)
    } else {
        // We need an overflow check for the adjusted offset.
        let access_size_val = pos.ins().iconst(offset_ty, access_size as i64);
@ -88,13 +83,27 @@ fn dynamic_addr(
            overflow,
            ir::TrapCode::HeapOutOfBounds,
        );
-        oob = pos
-            .ins()
-            .icmp(IntCC::UnsignedGreaterThan, adj_offset, bound);
-    }
+        (IntCC::UnsignedGreaterThan, adj_offset, bound)
+    };
+    let oob = pos.ins().icmp(cc, lhs, bound);
    pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);

-    compute_addr(isa, inst, heap, addr_ty, offset, offset_ty, pos.func);
+    let spectre_oob_comparison = if isa.flags().enable_heap_access_spectre_mitigation() {
+        Some((cc, lhs, bound))
+    } else {
+        None
+    };
+
+    compute_addr(
+        isa,
+        inst,
+        heap,
+        addr_ty,
+        offset,
+        offset_ty,
+        pos.func,
+        spectre_oob_comparison,
+    );
 }

 /// Expand a `heap_addr` for a static heap.
@ -146,20 +155,35 @@ fn static_addr(
    // With that we have an optimization here where with 32-bit offsets and
    // `bound - access_size >= 4GB` we can omit a bounds check.
    let limit = bound - access_size;
+    let mut spectre_oob_comparison = None;
    if offset_ty != ir::types::I32 || limit < 0xffff_ffff {
-        let oob = if limit & 1 == 1 {
+        let (cc, lhs, limit_imm) = if limit & 1 == 1 {
            // Prefer testing `offset >= limit - 1` when limit is odd because an even number is
            // likely to be a convenient constant on ARM and other RISC architectures.
-            pos.ins()
-                .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit as i64 - 1)
+            let limit = limit as i64 - 1;
+            (IntCC::UnsignedGreaterThanOrEqual, offset, limit)
        } else {
-            pos.ins()
-                .icmp_imm(IntCC::UnsignedGreaterThan, offset, limit as i64)
+            let limit = limit as i64;
+            (IntCC::UnsignedGreaterThan, offset, limit)
        };
+        let oob = pos.ins().icmp_imm(cc, lhs, limit_imm);
        pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
+        if isa.flags().enable_heap_access_spectre_mitigation() {
+            let limit = pos.ins().iconst(offset_ty, limit_imm);
+            spectre_oob_comparison = Some((cc, lhs, limit));
+        }
    }

-    compute_addr(isa, inst, heap, addr_ty, offset, offset_ty, pos.func);
+    compute_addr(
+        isa,
+        inst,
+        heap,
+        addr_ty,
+        offset,
+        offset_ty,
+        pos.func,
+        spectre_oob_comparison,
+    );
 }

 /// Emit code for the base address computation of a `heap_addr` instruction.
@ -171,6 +195,11 @@ fn compute_addr(
    mut offset: ir::Value,
    offset_ty: ir::Type,
    func: &mut ir::Function,
+    // If we are performing Spectre mitigation with conditional selects, the
+    // values to compare and the condition code that indicates an out-of bounds
+    // condition; on this condition, the conditional move will choose a
+    // speculatively safe address (a zero / null pointer) instead.
+    spectre_oob_comparison: Option<(IntCC, ir::Value, ir::Value)>,
 ) {
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);
@ -198,5 +227,15 @@ fn compute_addr(
        pos.ins().global_value(addr_ty, base_gv)
    };

-    pos.func.dfg.replace(inst).iadd(base, offset);
+    if let Some((cc, a, b)) = spectre_oob_comparison {
+        let final_addr = pos.ins().iadd(base, offset);
+        let zero = pos.ins().iconst(addr_ty, 0);
+        let flags = pos.ins().ifcmp(a, b);
+        pos.func
+            .dfg
+            .replace(inst)
+            .selectif_spectre_guard(addr_ty, cc, flags, zero, final_addr);
+    } else {
+        pos.func.dfg.replace(inst).iadd(base, offset);
+    }
 }
--- a/third_party/rust/cranelift-codegen/src/machinst/buffer.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/buffer.rs
@ -12,13 +12,13 @@
 //!   from the branch itself.
 //!
 //! - The lowering of control flow from the CFG-with-edges produced by
-//!   [BlockLoweringOrder], combined with many empty edge blocks when the register
-//!   allocator does not need to insert any spills/reloads/moves in edge blocks,
-//!   results in many suboptimal branch patterns. The lowering also pays no
-//!   attention to block order, and so two-target conditional forms (cond-br
-//!   followed by uncond-br) can often by avoided because one of the targets is
-//!   the fallthrough. There are several cases here where we can simplify to use
-//!   fewer branches.
+//!   [BlockLoweringOrder](super::BlockLoweringOrder), combined with many empty
+//!   edge blocks when the register allocator does not need to insert any
+//!   spills/reloads/moves in edge blocks, results in many suboptimal branch
+//!   patterns. The lowering also pays no attention to block order, and so
+//!   two-target conditional forms (cond-br followed by uncond-br) can often by
+//!   avoided because one of the targets is the fallthrough. There are several
+//!   cases here where we can simplify to use fewer branches.
 //!
 //! This "buffer" implements a single-pass code emission strategy (with a later
 //! "fixup" pass, but only through recorded fixups, not all instructions). The
@ -41,7 +41,7 @@
 //!   by the emitter (e.g., vcode iterating over instruction structs). The emitter
 //!   has some awareness of this: it either asks for an island between blocks, so
 //!   it is not accidentally executed, or else it emits a branch around the island
-//!   when all other options fail (see [Inst::EmitIsland] meta-instruction).
+//!   when all other options fail (see `Inst::EmitIsland` meta-instruction).
 //!
 //! - A "veneer" is an instruction (or sequence of instructions) in an "island"
 //!   that implements a longer-range reference to a label. The idea is that, for
@ -1024,7 +1024,7 @@ impl<I: VCodeInst> MachBuffer<I> {
                let veneer_offset = self.cur_offset();
                trace!("making a veneer at {}", veneer_offset);
                let slice = &mut self.data[start..end];
-                // Patch the original label use to refer to teh veneer.
+                // Patch the original label use to refer to the veneer.
                trace!(
                    "patching original at offset {} to veneer offset {}",
                    offset,
--- a/third_party/rust/cranelift-codegen/src/machinst/lower.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/lower.rs
@ -132,7 +132,11 @@ pub trait LowerCtx {
    fn get_input(&self, ir_inst: Inst, idx: usize) -> LowerInput;
    /// Get the `idx`th output register of the given IR instruction. When
    /// `backend.lower_inst_to_regs(ctx, inst)` is called, it is expected that
-    /// the backend will write results to these output register(s).
+    /// the backend will write results to these output register(s).  This
+    /// register will always be "fresh"; it is guaranteed not to overlap with
+    /// any of the inputs, and can be freely used as a scratch register within
+    /// the lowered instruction sequence, as long as its final value is the
+    /// result of the computation.
    fn get_output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg>;

    // Codegen primitives: allocate temps, emit instructions, set result registers,
--- a/third_party/rust/cranelift-codegen/src/machinst/vcode.rs
+++ b/third_party/rust/cranelift-codegen/src/machinst/vcode.rs
@ -137,13 +137,10 @@ impl<I: VCodeInst> VCodeBuilder<I> {
    /// Set the type of a VReg.
    pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
        if self.vcode.vreg_types.len() <= vreg.get_index() {
-            self.vcode.vreg_types.resize(
-                self.vcode.vreg_types.len()
-                    + ((vreg.get_index() + 1) - self.vcode.vreg_types.len()),
-                ir::types::I8,
-            )
+            self.vcode
+                .vreg_types
+                .resize(vreg.get_index() + 1, ir::types::I8);
        }
-
        self.vcode.vreg_types[vreg.get_index()] = ty;
    }

--- a/third_party/rust/cranelift-codegen/src/postopt.rs
+++ b/third_party/rust/cranelift-codegen/src/postopt.rs
@ -386,7 +386,11 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI
    }

    let ok = pos.func.update_encoding(inst, isa).is_ok();
-    debug_assert!(ok);
+    debug_assert!(
+        ok,
+        "failed to update encoding for `{}`",
+        pos.func.dfg.display_inst(inst, isa)
+    );
 }

 //----------------------------------------------------------------------
--- a/third_party/rust/cranelift-codegen/src/preopt.serialized
+++ b/third_party/rust/cranelift-codegen/src/preopt.serialized
--- a/third_party/rust/cranelift-codegen/src/redundant_reload_remover.rs
+++ b/third_party/rust/cranelift-codegen/src/redundant_reload_remover.rs
@ -635,7 +635,11 @@ impl RedundantReloadRemover {
                // Load is completely redundant.  Convert it to a no-op.
                dfg.replace(inst).fill_nop(arg);
                let ok = func.update_encoding(inst, isa).is_ok();
-                debug_assert!(ok, "fill_nop encoding missing for this type");
+                debug_assert!(
+                    ok,
+                    "fill_nop encoding missing for this type: `{}`",
+                    func.dfg.display_inst(inst, isa)
+                );
            }
            Transform::ChangeToCopyToSSA(ty, reg) => {
                // We already have the relevant value in some other register.  Convert the
--- a/third_party/rust/cranelift-codegen/src/result.rs
+++ b/third_party/rust/cranelift-codegen/src/result.rs
@ -21,7 +21,7 @@ pub enum CodegenError {
    /// Cranelift can compile very large and complicated functions, but the [implementation has
    /// limits][limits] that cause compilation to fail when they are exceeded.
    ///
-    /// [limits]: https://github.com/bytecodealliance/wasmtime/blob/master/cranelift/docs/ir.md#implementation-limits
+    /// [limits]: https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/docs/ir.md#implementation-limits
    #[error("Implementation limit exceeded")]
    ImplLimitExceeded,

--- a/third_party/rust/cranelift-codegen/src/settings.rs
+++ b/third_party/rust/cranelift-codegen/src/settings.rs
@ -399,6 +399,7 @@ emit_all_ones_funcaddrs = false
 enable_probestack = true
 probestack_func_adjusts_sp = false
 enable_jump_tables = true
+enable_heap_access_spectre_mitigation = true
 "#
        );
        assert_eq!(f.opt_level(), super::OptLevel::None);
--- a/third_party/rust/cranelift-frontend/.cargo-checksum.json
+++ b/third_party/rust/cranelift-frontend/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"2633e2c61491f80fbeea54dcf8763ff7c4b91510da00c32fdba8425cf5267a74","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"d1d8477572f70cc28f71424af272d9eec0adf58af657ff153c4acbbb39822a50","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
+{"files":{"Cargo.toml":"2633e2c61491f80fbeea54dcf8763ff7c4b91510da00c32fdba8425cf5267a74","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"dea43e8044284df50f8b8772e9b48ba8b109b45c74111ff73619775d57ad8d67","src/frontend.rs":"ac3a1e3070b1ab0bdec84e4d73ec182b50d0b9a4017e6a95c37adab57571b827","src/lib.rs":"5197f467d1625ee2b117a168f4b1886b4b69d4250faea6618360a5adc70b4e0c","src/ssa.rs":"650d26025706cfb63935f956bca6f166b0edfa32260cd2a8c27f9b49fcc743c3","src/switch.rs":"3bf1f11817565b95edfbc9393ef2bfdeacf534264c9d44b4f93d1432b353af6c","src/variable.rs":"399437bd7d2ac11a7a748bad7dd1f6dac58824d374ec318f36367a9d077cc225"},"package":null}
--- a/third_party/rust/cranelift-frontend/src/frontend.rs
+++ b/third_party/rust/cranelift-frontend/src/frontend.rs
@ -206,6 +206,11 @@ impl<'a> FunctionBuilder<'a> {
        }
    }

+    /// Get the block that this builder is currently at.
+    pub fn current_block(&self) -> Option<Block> {
+        self.position.expand()
+    }
+
    /// Set the source location that should be assigned to all new instructions.
    pub fn set_srcloc(&mut self, srcloc: ir::SourceLoc) {
        self.srcloc = srcloc;
@ -223,6 +228,11 @@ impl<'a> FunctionBuilder<'a> {
        block
    }

+    /// Insert `block` in the layout *after* the existing block `after`.
+    pub fn insert_block_after(&mut self, block: Block, after: Block) {
+        self.func.layout.insert_block_after(block, after);
+    }
+
    /// After the call to this function, new instructions will be inserted into the designated
    /// block, in the order they are declared. You must declare the types of the Block arguments
    /// you will use here.
--- a/third_party/rust/cranelift-wasm/.cargo-checksum.json
+++ b/third_party/rust/cranelift-wasm/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.toml":"4a020cf3914cc5e863a120f2bde92354a063ffe827a28422a5ecf86433cf8cbc","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"cce724251d4abc08c6492e1e25c138ab5a0d11e9ac90bc573652b18e034f56ed","src/code_translator.rs":"132fe4f0ee579339f249270907617a1925b628c306659cdaab58df907b6a83ce","src/environ/dummy.rs":"07b6510a7141b92769c914e37386790486f92b691beb0876b8590f2ae5489ee4","src/environ/mod.rs":"692f35d75f125f9c071f7166252f427e4bac29401356f73307c6c36e23c667fb","src/environ/spec.rs":"2ff8524cd592efdef67e5f8d06d144f7d628dee8183848ff4f5e35850f3ce550","src/func_translator.rs":"eb1fcea970407eda872984808e9a3e3a3297c2dea6e3a600ee7116ca89c7b49f","src/lib.rs":"6d3662b3f219a3f7a26f6b44b7921a19da1d892cf78f5a4434fdced5753b069f","src/module_translator.rs":"bcdf5a84226b726a73f4be0acb0318ca89c82584460101378e73021d85bd4485","src/sections_translator.rs":"db567511e273a9e383b18a15fc47f74a1247cbe13f120d7656c21660be53ab78","src/state/func_state.rs":"b114522784984a7cc26a3549c7c17f842885e1232254de81d938f9d155f95aa6","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"3cb3d9de26ec7ccc0ba81ed82163f27648794d4d1d1162eae8eee80a3c0ac05a","src/translation_utils.rs":"20082fded6a8d3637eccbda4465355d8d9fab0a1cd8222accb10cb3e06543689","tests/wasm_testsuite.rs":"da8dedfd11918946e9cf6af68fd4826f020ef90a4e22742b1a30e61a3fb4aedd"},"package":null}
+{"files":{"Cargo.toml":"6c9d8563161a9803e876842482a1c34fd0ea740d5a7141fc51cec3c21ef60eec","LICENSE":"268872b9816f90fd8e85db5a28d33f8150ebb8dd016653fb39ef1f94f2686bc5","README.md":"c82c252fbeeaa101a0eef042b9a925eb1fa3d2b51d19481b9c22e593e6a8d772","src/code_translator.rs":"e8d525ae48f967ebda012981b10dd11fbb46d9223fd95d1e3409da528851fcf7","src/environ/dummy.rs":"922d029491a9f5c55d22fcc9fbeae9e8c6721fa6556527785494f1351874e9f3","src/environ/mod.rs":"692f35d75f125f9c071f7166252f427e4bac29401356f73307c6c36e23c667fb","src/environ/spec.rs":"026a145c1cf9cd25c77e7ea8e0bb43739769dfc4693fcf827f6cdb79acf398a1","src/func_translator.rs":"b4391a11df5c401c9ddd26698105548b7a861c8deb5f84215f0b88cba5327362","src/lib.rs":"7bdbcf638fa30fb05e8320439881f7536824f7f60a7db4f0c1b51ab369edf895","src/module_translator.rs":"47b575f0edbe8a2a3334261742870ce7424e13d91f8980609f9c963a2811e1f6","src/sections_translator.rs":"ebd08548e048c7f792da45aa8d710e7d6f047e9197bc86260743c97d00dd99f6","src/state/func_state.rs":"023e3eb4f69590167baecb3fa8e7b335d69a631fff68fa0ee249075699f71a30","src/state/mod.rs":"20014cb93615467b4d20321b52f67f66040417efcaa739a4804093bb559eed19","src/state/module_state.rs":"3cb3d9de26ec7ccc0ba81ed82163f27648794d4d1d1162eae8eee80a3c0ac05a","src/translation_utils.rs":"0a2a53a7f60a5192661ce4c95ee9bd6775e1eb7d32647e1c6e026b0f8849cd2c","tests/wasm_testsuite.rs":"da8dedfd11918946e9cf6af68fd4826f020ef90a4e22742b1a30e61a3fb4aedd"},"package":null}
--- a/third_party/rust/cranelift-wasm/Cargo.toml
+++ b/third_party/rust/cranelift-wasm/Cargo.toml
@ -12,7 +12,7 @@ keywords = ["webassembly", "wasm"]
 edition = "2018"

 [dependencies]
-wasmparser = { version = "0.57.0", default-features = false }
+wasmparser = { version = "0.58.0", default-features = false }
 cranelift-codegen = { path = "../codegen", version = "0.65.0", default-features = false }
 cranelift-entity = { path = "../entity", version = "0.65.0" }
 cranelift-frontend = { path = "../frontend", version = "0.65.0", default-features = false }
--- a/third_party/rust/cranelift-wasm/README.md
+++ b/third_party/rust/cranelift-wasm/README.md
@ -5,4 +5,4 @@ If you're looking for a complete WebAssembly implementation that uses this
 library, see [Wasmtime].

 [Wasmtime]: https://github.com/bytecodealliance/wasmtime
-[Cranelift IR]: https://github.com/bytecodealliance/wasmtime/blob/master/cranelift/docs/ir.md
+[Cranelift IR]: https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/docs/ir.md
--- a/third_party/rust/cranelift-wasm/src/code_translator.rs
+++ b/third_party/rust/cranelift-wasm/src/code_translator.rs
@ -533,7 +533,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            // `index` is the index of the function's signature and `table_index` is the index of
            // the table to search the function in.
            let (sigref, num_args) = state.get_indirect_sig(builder.func, *index, environ)?;
-            let table = state.get_table(builder.func, *table_index, environ)?;
+            let table = state.get_or_create_table(builder.func, *table_index, environ)?;
            let callee = state.pop1();

            // Bitcast any vector arguments to their default type, I8X16, before calling.
@ -1039,15 +1039,14 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
        Operator::F32Le | Operator::F64Le => {
            translate_fcmp(FloatCC::LessThanOrEqual, builder, state)
        }
-        Operator::RefNull { ty: _ } => state.push1(builder.ins().null(environ.reference_type())),
+        Operator::RefNull { ty } => state.push1(environ.translate_ref_null(builder.cursor(), *ty)?),
        Operator::RefIsNull { ty: _ } => {
-            let arg = state.pop1();
-            let val = builder.ins().is_null(arg);
-            let val_int = builder.ins().bint(I32, val);
-            state.push1(val_int);
+            let value = state.pop1();
+            state.push1(environ.translate_ref_is_null(builder.cursor(), value)?);
        }
        Operator::RefFunc { function_index } => {
-            state.push1(environ.translate_ref_func(builder.cursor(), *function_index)?);
+            let index = FuncIndex::from_u32(*function_index);
+            state.push1(environ.translate_ref_func(builder.cursor(), index)?);
        }
        Operator::AtomicNotify { .. }
        | Operator::I32AtomicWait { .. }
@ -1163,41 +1162,45 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            environ.translate_data_drop(builder.cursor(), *segment)?;
        }
        Operator::TableSize { table: index } => {
-            let table = state.get_table(builder.func, *index, environ)?;
+            let table = state.get_or_create_table(builder.func, *index, environ)?;
            state.push1(environ.translate_table_size(
                builder.cursor(),
                TableIndex::from_u32(*index),
                table,
            )?);
        }
-        Operator::TableGrow { table } => {
-            let table_index = TableIndex::from_u32(*table);
+        Operator::TableGrow { table: index } => {
+            let table_index = TableIndex::from_u32(*index);
+            let table = state.get_or_create_table(builder.func, *index, environ)?;
            let delta = state.pop1();
            let init_value = state.pop1();
            state.push1(environ.translate_table_grow(
                builder.cursor(),
                table_index,
+                table,
                delta,
                init_value,
            )?);
        }
-        Operator::TableGet { table } => {
-            let table_index = TableIndex::from_u32(*table);
+        Operator::TableGet { table: index } => {
+            let table_index = TableIndex::from_u32(*index);
+            let table = state.get_or_create_table(builder.func, *index, environ)?;
            let index = state.pop1();
-            state.push1(environ.translate_table_get(builder.cursor(), table_index, index)?);
+            state.push1(environ.translate_table_get(builder, table_index, table, index)?);
        }
-        Operator::TableSet { table } => {
-            let table_index = TableIndex::from_u32(*table);
+        Operator::TableSet { table: index } => {
+            let table_index = TableIndex::from_u32(*index);
+            let table = state.get_or_create_table(builder.func, *index, environ)?;
            let value = state.pop1();
            let index = state.pop1();
-            environ.translate_table_set(builder.cursor(), table_index, value, index)?;
+            environ.translate_table_set(builder, table_index, table, value, index)?;
        }
        Operator::TableCopy {
            dst_table: dst_table_index,
            src_table: src_table_index,
        } => {
-            let dst_table = state.get_table(builder.func, *dst_table_index, environ)?;
-            let src_table = state.get_table(builder.func, *src_table_index, environ)?;
+            let dst_table = state.get_or_create_table(builder.func, *dst_table_index, environ)?;
+            let src_table = state.get_or_create_table(builder.func, *src_table_index, environ)?;
            let len = state.pop1();
            let src = state.pop1();
            let dest = state.pop1();
@ -1223,7 +1226,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            segment,
            table: table_index,
        } => {
-            let table = state.get_table(builder.func, *table_index, environ)?;
+            let table = state.get_or_create_table(builder.func, *table_index, environ)?;
            let len = state.pop1();
            let src = state.pop1();
            let dest = state.pop1();
@ -1383,6 +1386,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            let a = pop1_with_bitcast(state, type_of(op), builder);
            state.push1(builder.ins().ineg(a))
        }
+        Operator::I8x16Abs | Operator::I16x8Abs | Operator::I32x4Abs => {
+            let a = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().iabs(a))
+        }
        Operator::I16x8Mul | Operator::I32x4Mul | Operator::I64x2Mul => {
            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
            state.push1(builder.ins().imul(a, b))
@ -1548,11 +1555,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            let a = pop1_with_bitcast(state, I32X4, builder);
            state.push1(builder.ins().fcvt_from_uint(F32X4, a))
        }
-        Operator::I32x4TruncSatF32x4S
-        | Operator::I32x4TruncSatF32x4U
-        | Operator::I8x16Abs
-        | Operator::I16x8Abs
-        | Operator::I32x4Abs
+        Operator::I32x4TruncSatF32x4S => {
+            let a = pop1_with_bitcast(state, F32X4, builder);
+            state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a))
+        }
+        Operator::I32x4TruncSatF32x4U
        | Operator::I8x16NarrowI16x8S { .. }
        | Operator::I8x16NarrowI16x8U { .. }
        | Operator::I16x8NarrowI32x4S { .. }
@ -1564,7 +1571,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
        | Operator::I32x4WidenLowI16x8S { .. }
        | Operator::I32x4WidenHighI16x8S { .. }
        | Operator::I32x4WidenLowI16x8U { .. }
-        | Operator::I32x4WidenHighI16x8U { .. } => {
+        | Operator::I32x4WidenHighI16x8U { .. }
+        | Operator::I8x16Bitmask
+        | Operator::I16x8Bitmask
+        | Operator::I32x4Bitmask => {
            return Err(wasm_unsupported!("proposed SIMD operator {:?}", op));
        }

@ -1981,6 +1991,7 @@ fn type_of(operator: &Operator) -> Type {
        | Operator::I8x16GeS
        | Operator::I8x16GeU
        | Operator::I8x16Neg
+        | Operator::I8x16Abs
        | Operator::I8x16AnyTrue
        | Operator::I8x16AllTrue
        | Operator::I8x16Shl
@ -1996,7 +2007,8 @@ fn type_of(operator: &Operator) -> Type {
        | Operator::I8x16MinU
        | Operator::I8x16MaxS
        | Operator::I8x16MaxU
-        | Operator::I8x16RoundingAverageU => I8X16,
+        | Operator::I8x16RoundingAverageU
+        | Operator::I8x16Bitmask => I8X16,

        Operator::I16x8Splat
        | Operator::V16x8LoadSplat { .. }
@ -2014,6 +2026,7 @@ fn type_of(operator: &Operator) -> Type {
        | Operator::I16x8GeS
        | Operator::I16x8GeU
        | Operator::I16x8Neg
+        | Operator::I16x8Abs
        | Operator::I16x8AnyTrue
        | Operator::I16x8AllTrue
        | Operator::I16x8Shl
@ -2030,7 +2043,8 @@ fn type_of(operator: &Operator) -> Type {
        | Operator::I16x8MaxS
        | Operator::I16x8MaxU
        | Operator::I16x8RoundingAverageU
-        | Operator::I16x8Mul => I16X8,
+        | Operator::I16x8Mul
+        | Operator::I16x8Bitmask => I16X8,

        Operator::I32x4Splat
        | Operator::V32x4LoadSplat { .. }
@ -2047,6 +2061,7 @@ fn type_of(operator: &Operator) -> Type {
        | Operator::I32x4GeS
        | Operator::I32x4GeU
        | Operator::I32x4Neg
+        | Operator::I32x4Abs
        | Operator::I32x4AnyTrue
        | Operator::I32x4AllTrue
        | Operator::I32x4Shl
@ -2060,7 +2075,8 @@ fn type_of(operator: &Operator) -> Type {
        | Operator::I32x4MaxS
        | Operator::I32x4MaxU
        | Operator::F32x4ConvertI32x4S
-        | Operator::F32x4ConvertI32x4U => I32X4,
+        | Operator::F32x4ConvertI32x4U
+        | Operator::I32x4Bitmask => I32X4,

        Operator::I64x2Splat
        | Operator::V64x2LoadSplat { .. }
--- a/third_party/rust/cranelift-wasm/src/environ/dummy.rs
+++ b/third_party/rust/cranelift-wasm/src/environ/dummy.rs
@ -22,6 +22,7 @@ use cranelift_codegen::ir::types::*;
 use cranelift_codegen::ir::{self, InstBuilder};
 use cranelift_codegen::isa::TargetFrontendConfig;
 use cranelift_entity::{EntityRef, PrimaryMap, SecondaryMap};
+use cranelift_frontend::FunctionBuilder;
 use std::boxed::Box;
 use std::string::String;
 use std::vec::Vec;
@ -197,6 +198,14 @@ impl<'dummy_environment> DummyFuncEnvironment<'dummy_environment> {
        ));
        sig
    }
+
+    fn reference_type(&self) -> ir::Type {
+        match self.pointer_type() {
+            ir::types::I32 => ir::types::R32,
+            ir::types::I64 => ir::types::R64,
+            _ => panic!("unsupported pointer type"),
+        }
+    }
 }

 impl<'dummy_environment> TargetEnvironment for DummyFuncEnvironment<'dummy_environment> {
@ -435,6 +444,7 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
        &mut self,
        mut pos: FuncCursor,
        _table_index: TableIndex,
+        _table: ir::Table,
        _delta: ir::Value,
        _init_value: ir::Value,
    ) -> WasmResult<ir::Value> {
@ -443,17 +453,19 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ

    fn translate_table_get(
        &mut self,
-        mut pos: FuncCursor,
+        builder: &mut FunctionBuilder,
        _table_index: TableIndex,
+        _table: ir::Table,
        _index: ir::Value,
    ) -> WasmResult<ir::Value> {
-        Ok(pos.ins().null(self.reference_type()))
+        Ok(builder.ins().null(self.reference_type()))
    }

    fn translate_table_set(
        &mut self,
-        _pos: FuncCursor,
+        _builder: &mut FunctionBuilder,
        _table_index: TableIndex,
+        _table: ir::Table,
        _value: ir::Value,
        _index: ir::Value,
    ) -> WasmResult<()> {
@ -505,7 +517,7 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ
    fn translate_ref_func(
        &mut self,
        mut pos: FuncCursor,
-        _func_index: u32,
+        _func_index: FuncIndex,
    ) -> WasmResult<ir::Value> {
        Ok(pos.ins().null(self.reference_type()))
    }
--- a/third_party/rust/cranelift-wasm/src/environ/spec.rs
+++ b/third_party/rust/cranelift-wasm/src/environ/spec.rs
@ -78,7 +78,7 @@ pub enum WasmError {
    /// Cranelift can compile very large and complicated functions, but the [implementation has
    /// limits][limits] that cause compilation to fail when they are exceeded.
    ///
-    /// [limits]: https://github.com/bytecodealliance/wasmtime/blob/master/cranelift/docs/ir.md#implementation-limits
+    /// [limits]: https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/docs/ir.md#implementation-limits
    #[error("Implementation limit exceeded")]
    ImplLimitExceeded,

@ -133,10 +133,15 @@ pub trait TargetEnvironment {
        self.target_config().pointer_bytes()
    }

-    /// Get the Cranelift reference type to use for native references.
+    /// Get the Cranelift reference type to use for the given Wasm reference
+    /// type.
    ///
-    /// This returns `R64` for 64-bit architectures and `R32` for 32-bit architectures.
-    fn reference_type(&self) -> ir::Type {
+    /// By default, this returns `R64` for 64-bit architectures and `R32` for
+    /// 32-bit architectures. If you override this, then you should also
+    /// override `FuncEnvironment::{translate_ref_null, translate_ref_is_null}`
+    /// as well.
+    fn reference_type(&self, ty: WasmType) -> ir::Type {
+        let _ = ty;
        match self.pointer_type() {
            ir::types::I32 => ir::types::R32,
            ir::types::I64 => ir::types::R64,
@ -355,6 +360,7 @@ pub trait FuncEnvironment: TargetEnvironment {
        &mut self,
        pos: FuncCursor,
        table_index: TableIndex,
+        table: ir::Table,
        delta: ir::Value,
        init_value: ir::Value,
    ) -> WasmResult<ir::Value>;
@ -362,16 +368,18 @@ pub trait FuncEnvironment: TargetEnvironment {
    /// Translate a `table.get` WebAssembly instruction.
    fn translate_table_get(
        &mut self,
-        pos: FuncCursor,
+        builder: &mut FunctionBuilder,
        table_index: TableIndex,
+        table: ir::Table,
        index: ir::Value,
    ) -> WasmResult<ir::Value>;

    /// Translate a `table.set` WebAssembly instruction.
    fn translate_table_set(
        &mut self,
-        pos: FuncCursor,
+        builder: &mut FunctionBuilder,
        table_index: TableIndex,
+        table: ir::Table,
        value: ir::Value,
        index: ir::Value,
    ) -> WasmResult<()>;
@ -416,8 +424,43 @@ pub trait FuncEnvironment: TargetEnvironment {
    /// Translate a `elem.drop` WebAssembly instruction.
    fn translate_elem_drop(&mut self, pos: FuncCursor, seg_index: u32) -> WasmResult<()>;

+    /// Translate a `ref.null T` WebAssembly instruction.
+    ///
+    /// By default, translates into a null reference type.
+    ///
+    /// Override this if you don't use Cranelift reference types for all Wasm
+    /// reference types (e.g. you use a raw pointer for `funcref`s) or if the
+    /// null sentinel is not a null reference type pointer for your type. If you
+    /// override this method, then you should also override
+    /// `translate_ref_is_null` as well.
+    fn translate_ref_null(&mut self, mut pos: FuncCursor, ty: WasmType) -> WasmResult<ir::Value> {
+        let _ = ty;
+        Ok(pos.ins().null(self.reference_type(ty)))
+    }
+
+    /// Translate a `ref.is_null` WebAssembly instruction.
+    ///
+    /// By default, assumes that `value` is a Cranelift reference type, and that
+    /// a null Cranelift reference type is the null value for all Wasm reference
+    /// types.
+    ///
+    /// If you override this method, you probably also want to override
+    /// `translate_ref_null` as well.
+    fn translate_ref_is_null(
+        &mut self,
+        mut pos: FuncCursor,
+        value: ir::Value,
+    ) -> WasmResult<ir::Value> {
+        let is_null = pos.ins().is_null(value);
+        Ok(pos.ins().bint(ir::types::I32, is_null))
+    }
+
    /// Translate a `ref.func` WebAssembly instruction.
-    fn translate_ref_func(&mut self, pos: FuncCursor, func_index: u32) -> WasmResult<ir::Value>;
+    fn translate_ref_func(
+        &mut self,
+        pos: FuncCursor,
+        func_index: FuncIndex,
+    ) -> WasmResult<ir::Value>;

    /// Translate a `global.get` WebAssembly instruction at `pos` for a global
    /// that is custom.
--- a/third_party/rust/cranelift-wasm/src/func_translator.rs
+++ b/third_party/rust/cranelift-wasm/src/func_translator.rs
@ -196,8 +196,7 @@ fn declare_locals<FE: FuncEnvironment + ?Sized>(
            let constant_handle = builder.func.dfg.constants.insert([0; 16].to_vec().into());
            builder.ins().vconst(ir::types::I8X16, constant_handle)
        }
-        ExternRef => builder.ins().null(environ.reference_type()),
-        FuncRef => builder.ins().null(environ.reference_type()),
+        ExternRef | FuncRef => environ.translate_ref_null(builder.cursor(), wasm_type)?,
        ty => return Err(wasm_unsupported!("unsupported local type {:?}", ty)),
    };

--- a/third_party/rust/cranelift-wasm/src/lib.rs
+++ b/third_party/rust/cranelift-wasm/src/lib.rs
@ -70,6 +70,7 @@ pub use crate::translation_utils::{
    DefinedTableIndex, ElemIndex, FuncIndex, Global, GlobalIndex, GlobalInit, Memory, MemoryIndex,
    SignatureIndex, Table, TableElementType, TableIndex,
 };
+pub use cranelift_frontend::FunctionBuilder;

 /// Version number of this crate.
 pub const VERSION: &str = env!("CARGO_PKG_VERSION");
--- a/third_party/rust/cranelift-wasm/src/module_translator.rs
+++ b/third_party/rust/cranelift-wasm/src/module_translator.rs
@ -71,6 +71,11 @@ pub fn translate_module<'data>(
                environ.reserve_passive_data(count)?;
            }

+            SectionContent::Module(_)
+            | SectionContent::ModuleCode(_)
+            | SectionContent::Instance(_)
+            | SectionContent::Alias(_) => unimplemented!("module linking not implemented yet"),
+
            SectionContent::Custom {
                name,
                binary,
--- a/third_party/rust/cranelift-wasm/src/sections_translator.rs
+++ b/third_party/rust/cranelift-wasm/src/sections_translator.rs
@ -26,7 +26,7 @@ use wasmparser::{
    ElementKind, ElementSectionReader, Export, ExportSectionReader, ExternalKind,
    FunctionSectionReader, GlobalSectionReader, GlobalType, ImportSectionEntryType,
    ImportSectionReader, MemorySectionReader, MemoryType, NameSectionReader, Naming, NamingReader,
-    Operator, TableSectionReader, Type, TypeSectionReader,
+    Operator, TableSectionReader, Type, TypeDef, TypeSectionReader,
 };

 /// Parses the Type section of the wasm module.
@ -40,22 +40,26 @@ pub fn parse_type_section(
    environ.reserve_signatures(count)?;

    for entry in types {
-        let wasm_func_ty = entry?;
-        let mut sig = Signature::new(ModuleEnvironment::target_config(environ).default_call_conv);
-        sig.params.extend(wasm_func_ty.params.iter().map(|ty| {
-            let cret_arg: ir::Type = type_to_type(*ty, environ)
-                .expect("only numeric types are supported in function signatures");
-            AbiParam::new(cret_arg)
-        }));
-        sig.returns.extend(wasm_func_ty.returns.iter().map(|ty| {
-            let cret_arg: ir::Type = type_to_type(*ty, environ)
-                .expect("only numeric types are supported in function signatures");
-            AbiParam::new(cret_arg)
-        }));
-        environ.declare_signature(&wasm_func_ty, sig)?;
-        module_translation_state
-            .wasm_types
-            .push((wasm_func_ty.params, wasm_func_ty.returns));
+        if let Ok(TypeDef::Func(wasm_func_ty)) = entry {
+            let mut sig =
+                Signature::new(ModuleEnvironment::target_config(environ).default_call_conv);
+            sig.params.extend(wasm_func_ty.params.iter().map(|ty| {
+                let cret_arg: ir::Type = type_to_type(*ty, environ)
+                    .expect("only numeric types are supported in function signatures");
+                AbiParam::new(cret_arg)
+            }));
+            sig.returns.extend(wasm_func_ty.returns.iter().map(|ty| {
+                let cret_arg: ir::Type = type_to_type(*ty, environ)
+                    .expect("only numeric types are supported in function signatures");
+                AbiParam::new(cret_arg)
+            }));
+            environ.declare_signature(&wasm_func_ty, sig)?;
+            module_translation_state
+                .wasm_types
+                .push((wasm_func_ty.params, wasm_func_ty.returns));
+        } else {
+            unimplemented!("module linking not implemented yet")
+        }
    }
    Ok(())
 }
@ -70,7 +74,7 @@ pub fn parse_import_section<'data>(
    for entry in imports {
        let import = entry?;
        let module_name = import.module;
-        let field_name = import.field;
+        let field_name = import.field.unwrap(); // TODO Handle error when module linking is implemented.

        match import.ty {
            ImportSectionEntryType::Function(sig) => {
@ -80,6 +84,9 @@ pub fn parse_import_section<'data>(
                    field_name,
                )?;
            }
+            ImportSectionEntryType::Module(_sig) | ImportSectionEntryType::Instance(_sig) => {
+                unimplemented!("module linking not implemented yet")
+            }
            ImportSectionEntryType::Memory(MemoryType {
                limits: ref memlimits,
                shared,
@ -97,6 +104,7 @@ pub fn parse_import_section<'data>(
            ImportSectionEntryType::Global(ref ty) => {
                environ.declare_global_import(
                    Global {
+                        wasm_ty: ty.content_type,
                        ty: type_to_type(ty.content_type, environ).unwrap(),
                        mutability: ty.mutable,
                        initializer: GlobalInit::Import,
@ -108,6 +116,7 @@ pub fn parse_import_section<'data>(
            ImportSectionEntryType::Table(ref tab) => {
                environ.declare_table_import(
                    Table {
+                        wasm_ty: tab.element_type,
                        ty: match tabletype_to_type(tab.element_type, environ)? {
                            Some(t) => TableElementType::Val(t),
                            None => TableElementType::Func,
@ -157,6 +166,7 @@ pub fn parse_table_section(
    for entry in tables {
        let table = entry?;
        environ.declare_table(Table {
+            wasm_ty: table.element_type,
            ty: match tabletype_to_type(table.element_type, environ)? {
                Some(t) => TableElementType::Val(t),
                None => TableElementType::Func,
@ -227,6 +237,7 @@ pub fn parse_global_section(
            }
        };
        let global = Global {
+            wasm_ty: content_type,
            ty: type_to_type(content_type, environ).unwrap(),
            mutability: mutable,
            initializer,
@ -264,6 +275,9 @@ pub fn parse_export_section<'data>(
            ExternalKind::Global => {
                environ.declare_global_export(GlobalIndex::new(index), field)?
            }
+            ExternalKind::Type | ExternalKind::Module | ExternalKind::Instance => {
+                unimplemented!("module linking not implemented yet")
+            }
        }
    }

@ -335,7 +349,9 @@ pub fn parse_element_section<'data>(
                let index = ElemIndex::from_u32(index as u32);
                environ.declare_passive_element(index, segments)?;
            }
-            ElementKind::Declared => return Err(wasm_unsupported!("element kind declared")),
+            ElementKind::Declared => {
+                // Nothing to do here.
+            }
        }
    }
    Ok(())
--- a/third_party/rust/cranelift-wasm/src/state/func_state.rs
+++ b/third_party/rust/cranelift-wasm/src/state/func_state.rs
@ -202,7 +202,7 @@ pub struct FuncTranslationState {
    heaps: HashMap<MemoryIndex, ir::Heap>,

    // Map of tables that have been created by `FuncEnvironment::make_table`.
-    tables: HashMap<TableIndex, ir::Table>,
+    pub(crate) tables: HashMap<TableIndex, ir::Table>,

    // Map of indirect call signatures that have been created by
    // `FuncEnvironment::make_indirect_sig()`.
@ -446,7 +446,7 @@ impl FuncTranslationState {

    /// Get the `Table` reference that should be used to access table `index`.
    /// Create the reference if necessary.
-    pub(crate) fn get_table<FE: FuncEnvironment + ?Sized>(
+    pub(crate) fn get_or_create_table<FE: FuncEnvironment + ?Sized>(
        &mut self,
        func: &mut ir::Function,
        index: u32,
--- a/third_party/rust/cranelift-wasm/src/translation_utils.rs
+++ b/third_party/rust/cranelift-wasm/src/translation_utils.rs
@ -1,5 +1,5 @@
 //! Helper functions and structures for the translation.
-use crate::environ::{TargetEnvironment, WasmResult};
+use crate::environ::{TargetEnvironment, WasmResult, WasmType};
 use crate::state::ModuleTranslationState;
 use crate::wasm_unsupported;
 use core::u32;
@ -67,10 +67,18 @@ entity_impl!(DataIndex);
 pub struct ElemIndex(u32);
 entity_impl!(ElemIndex);

-/// WebAssembly global.
+/// A WebAssembly global.
+///
+/// Note that we record both the original Wasm type and the Cranelift IR type
+/// used to represent it. This is because multiple different kinds of Wasm types
+/// might be represented with the same Cranelift IR type. For example, both a
+/// Wasm `i64` and a `funcref` might be represented with a Cranelift `i64` on
+/// 64-bit architectures, and when GC is not required for func refs.
 #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
 pub struct Global {
-    /// The type of the value stored in the global.
+    /// The Wasm type of the value stored in the global.
+    pub wasm_ty: crate::WasmType,
+    /// The Cranelift IR type of the value stored in the global.
    pub ty: ir::Type,
    /// A flag indicating whether the value may change at runtime.
    pub mutability: bool,
@ -104,7 +112,9 @@ pub enum GlobalInit {
 /// WebAssembly table.
 #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
 pub struct Table {
-    /// The type of data stored in elements of the table.
+    /// The table elements' Wasm type.
+    pub wasm_ty: WasmType,
+    /// The table elements' Cranelift type.
    pub ty: TableElementType,
    /// The minimum number of elements in the table.
    pub minimum: u32,
@ -143,7 +153,7 @@ pub fn type_to_type<PE: TargetEnvironment + ?Sized>(
        wasmparser::Type::F32 => Ok(ir::types::F32),
        wasmparser::Type::F64 => Ok(ir::types::F64),
        wasmparser::Type::V128 => Ok(ir::types::I8X16),
-        wasmparser::Type::ExternRef | wasmparser::Type::FuncRef => Ok(environ.reference_type()),
+        wasmparser::Type::ExternRef | wasmparser::Type::FuncRef => Ok(environ.reference_type(ty)),
        ty => Err(wasm_unsupported!("type_to_type: wasm type {:?}", ty)),
    }
 }
@ -160,7 +170,7 @@ pub fn tabletype_to_type<PE: TargetEnvironment + ?Sized>(
        wasmparser::Type::F32 => Ok(Some(ir::types::F32)),
        wasmparser::Type::F64 => Ok(Some(ir::types::F64)),
        wasmparser::Type::V128 => Ok(Some(ir::types::I8X16)),
-        wasmparser::Type::ExternRef => Ok(Some(environ.reference_type())),
+        wasmparser::Type::ExternRef => Ok(Some(environ.reference_type(ty))),
        wasmparser::Type::FuncRef => Ok(None),
        ty => Err(wasm_unsupported!(
            "tabletype_to_type: table wasm type {:?}",
@ -216,7 +226,7 @@ pub fn block_with_params<PE: TargetEnvironment + ?Sized>(
                builder.append_block_param(block, ir::types::F64);
            }
            wasmparser::Type::ExternRef | wasmparser::Type::FuncRef => {
-                builder.append_block_param(block, environ.reference_type());
+                builder.append_block_param(block, environ.reference_type(*ty));
            }
            wasmparser::Type::V128 => {
                builder.append_block_param(block, ir::types::I8X16);
--- a/third_party/rust/wasmparser/.cargo-checksum.json
+++ b/third_party/rust/wasmparser/.cargo-checksum.json
@ -1 +1 @@
-{"files":{"Cargo.lock":"f0adbcf79ba9aa2b13594b7b7e712884b8261151380659501abc2c2f7d5e9be0","Cargo.toml":"a5fbf3dde37713f296bb466ed72421400adef4947a8a61aa0cb9c52e5f0e51c7","README.md":"2e252886759b5ee5137ec39efc0765850be2cb4242c68e6b44452b75d3191db3","benches/benchmark.rs":"5ed64bf1497769a8df2a6518d06c7ca8dfaef328869ffa4fef43414035a07255","compare-master.sh":"165490eab36ef4eceb2913a6c5cdeff479a05e1e0119a7f4551b03dbcda51ad4","examples/dump.rs":"40cf9492d58e196e462c37d610fd372a648df780143d1804ae4c536f03ac5254","examples/simple.rs":"d2d46f1a232e9b23fd56982a84379b741423916983e0fd1f2a1009d456f7f851","src/binary_reader.rs":"83b0d2c7eb3fa23e0e3ea474586a9437678eb89b18682cd9a8824e678b75fc45","src/lib.rs":"8bb5301d5d66746160466e38b0e956e14e4997bf2385431ec9070cea13fc632e","src/limits.rs":"34e5cda95fb67669011ba95ca60f48fc777f3e3fa279ff68a1f2a072032a4abd","src/module_resources.rs":"f2f3e2a00ca74df1f6218ecf0c3d9ed36529675febba3e8a825585514658480f","src/operators_validator.rs":"e0085694bbde6d6c9efa853cc6d81ba86be39a7f41bf0ccff38ddd1dbe0898ff","src/parser.rs":"4b18ae92d0c6f85581bf77896f457b34f47c75cbbf422cdd4edac48c5f48c0cb","src/primitives.rs":"3017e05e7379c98e99de7e0618a8dbd59f37dc3f7e6da5e42bf3c4ca56ac7457","src/readers/code_section.rs":"bfdd8d5f08ef357679d7bfe6f9735ff4f08925361e0771a6b1b5112a12c62f30","src/readers/data_count_section.rs":"e711720f8205a906794dc7020a656a2ae74e1d9c3823fcdcdbd9d2f3b206c7d7","src/readers/data_section.rs":"f572e7d2589f0bccf5e97d43c1ca3aac103cbd47d139ead6b84b39b5c9d47c0b","src/readers/element_section.rs":"0193c9b7be80a0c18cba9f2d2892dba83819339aaf39a44d44003fec5328196c","src/readers/export_section.rs":"7c74f7a11406a95c162f6ad4f77aafd0b1eee309f33b69f06bea12b23925e143","src/readers/function_section.rs":"57c0479ba8d7f61908ed74e86cbc26553fdd6d2d952f032ce29385a39f82efd3","src/readers/global_section.rs":"5fa18bed0fffadcc2dbdcbaedbe4e4398992fd1ce9e611b0319333a7681082ac","src/readers/import_section.rs":"1db4bf7290d04783d5cf526050d025b15a1daaf2bd97fca1a92ecb873d48f641","src/readers/init_expr.rs":"7020c80013dad4518a5f969c3ab4d624b46d778f03e632871cf343964f63441c","src/readers/linking_section.rs":"9df71f3ee5356f0d273c099212213353080001e261ca697caddf6b847fb5af09","src/readers/memory_section.rs":"83212f86cfc40d18fb392e9234c880afdf443f4af38a727ba346f9c740ef8718","src/readers/mod.rs":"b9f835365b9b04411d7b141a3c9b52695e9bf8ef1f07094a10a18b901d0ac420","src/readers/module.rs":"db292e3cebe55e5f2e9de8aff0a2074fa874d42058c6bc2a798c5b7e3c1ca81e","src/readers/name_section.rs":"4ff460766bbcd67f658086c8fa525cf2bbceea67b393c65edfddbb714de722fd","src/readers/operators.rs":"1defc15f364775018ffe8c7f010ff83342c46659f780be4ba88c58fad7606e03","src/readers/producers_section.rs":"674f402fc4545c94487f827153871b37adab44ed5eff4070a436eb18e514023a","src/readers/reloc_section.rs":"0ef818a8b83a4542c4c29c23642436a92d3e7c37bc0248e817ed5a9d65ec38ce","src/readers/section_reader.rs":"f27f017938bb8602954298d053cd3b79d8876f9fcbbe0e1a3380051b6aa4584a","src/readers/sourcemappingurl_section.rs":"eff317f6f2b728a98a5eb68eec7e6cf222d27158d0d5597fd1c84f09b1092a50","src/readers/start_section.rs":"012fe574a5b94ea34c9d689629fb0df2f5ba4c11c835147b39155f5a8c715e34","src/readers/table_section.rs":"e564876825a7b31df2b5dc850279b523e26dc50a08da935cc8d635a49e809951","src/readers/type_section.rs":"2fa33a7b793f3bfa01c259b5dbc38633b7343931886ab41f0cb96dd78db3bf6e","src/tests.rs":"5d47ec97d0a303d8cbe905f8ddcf11212a03607e0b245c9f52371464e7d08ee7","src/validator.rs":"b14b45f03e99789d3b08d9c60a5d250904af776fa68fb0a39d400f5fa659ffc9"},"package":"32fddd575d477c6e9702484139cf9f23dcd554b06d185ed0f56c857dd3a47aa6"}
+{"files":{"Cargo.lock":"66295ad9f17449e9ef5c16b64c9f0fca138ff07e31fb182bdd134099a7d049b4","Cargo.toml":"ddff8c2657f4fd0f83ce3b732cea03b8eb1f434fdce886fba2904cee5b0090d5","README.md":"2e252886759b5ee5137ec39efc0765850be2cb4242c68e6b44452b75d3191db3","benches/benchmark.rs":"a50793192bdc1729a786bb456e5ad1e567c7f4b6a0a13ab0e46754e965978e8f","compare-with-main.sh":"2ddfab71ba571055292a29a36c1ede05f64ba094c78495b945d1486bf32ab6d7","examples/dump.rs":"a5944669754d1093c048a3b2e959c8c22e485a8069582d532172a8207e54dce6","examples/simple.rs":"0bbf762ca214815d81a915106efca05a9fa642a7a250c704c188258ec15d2629","src/binary_reader.rs":"d209e8cf15db30cb06e4c23980de775a59db8654aeb7a69bbe432c09f5046f76","src/lib.rs":"62c4b60aae7b7c5018caf68da31f929956f188042fa0715781e6879148f79db1","src/limits.rs":"22649a707c3f894d0381d745f744876a106cacb72d8a9a608cfa7a6f3f1e5631","src/module_resources.rs":"3a2137adb9018a5d5ebcaf274f969e650e184f77e5db62cd9b655cc6e97fdee1","src/operators_validator.rs":"4d98039d738be26670f7fb399e0738dde6caa170c09139badb62190795c78593","src/parser.rs":"061ba728cbf044456c088255c4c633d5bcc630fe9035a21168f068e498e8255c","src/primitives.rs":"c5056a6f6f444cdd4b45d2b7bb332b776088d7b5bc323e3daddeb48110025b25","src/readers/alias_section.rs":"fa64491870d97577bad7f1891aab1be7597a940bc0e2ccfef0c84847e9df3d6d","src/readers/code_section.rs":"bfdd8d5f08ef357679d7bfe6f9735ff4f08925361e0771a6b1b5112a12c62f30","src/readers/data_count_section.rs":"e711720f8205a906794dc7020a656a2ae74e1d9c3823fcdcdbd9d2f3b206c7d7","src/readers/data_section.rs":"f572e7d2589f0bccf5e97d43c1ca3aac103cbd47d139ead6b84b39b5c9d47c0b","src/readers/element_section.rs":"0193c9b7be80a0c18cba9f2d2892dba83819339aaf39a44d44003fec5328196c","src/readers/export_section.rs":"7c74f7a11406a95c162f6ad4f77aafd0b1eee309f33b69f06bea12b23925e143","src/readers/function_section.rs":"57c0479ba8d7f61908ed74e86cbc26553fdd6d2d952f032ce29385a39f82efd3","src/readers/global_section.rs":"5fa18bed0fffadcc2dbdcbaedbe4e4398992fd1ce9e611b0319333a7681082ac","src/readers/import_section.rs":"236e754867ad7829b5a95221051daff3c5df971aff9f2339fa11256f2309d209","src/readers/init_expr.rs":"7020c80013dad4518a5f969c3ab4d624b46d778f03e632871cf343964f63441c","src/readers/instance_section.rs":"7b78bbca4b79ac7f9c42455815863a4d32dc074c5973ab1035dbfdf88b0d3c12","src/readers/linking_section.rs":"9df71f3ee5356f0d273c099212213353080001e261ca697caddf6b847fb5af09","src/readers/memory_section.rs":"83212f86cfc40d18fb392e9234c880afdf443f4af38a727ba346f9c740ef8718","src/readers/mod.rs":"d80ba76d763b06ae6e570f09a312b20006f0b83b5cd01d6baab496006fe9b7f1","src/readers/module.rs":"04f6e0bb7250f86037f30754d95a2941623272002184f372ed159db19f52dc7e","src/readers/module_code_section.rs":"aa9cf64f65e43ea5fcf9e695b7b1ba5a45b92d537f7ccef379a07162108ce9d9","src/readers/module_section.rs":"7a0c0b34478ec32030c5400df2366914c7e08ba799440a8c5ea999755c489e7f","src/readers/name_section.rs":"23b5106b17744833fb8cd61cb102e756bccb4a44594d34a5dd8b7930307ac4cb","src/readers/operators.rs":"1defc15f364775018ffe8c7f010ff83342c46659f780be4ba88c58fad7606e03","src/readers/producers_section.rs":"674f402fc4545c94487f827153871b37adab44ed5eff4070a436eb18e514023a","src/readers/reloc_section.rs":"0ef818a8b83a4542c4c29c23642436a92d3e7c37bc0248e817ed5a9d65ec38ce","src/readers/section_reader.rs":"f27f017938bb8602954298d053cd3b79d8876f9fcbbe0e1a3380051b6aa4584a","src/readers/sourcemappingurl_section.rs":"eff317f6f2b728a98a5eb68eec7e6cf222d27158d0d5597fd1c84f09b1092a50","src/readers/start_section.rs":"012fe574a5b94ea34c9d689629fb0df2f5ba4c11c835147b39155f5a8c715e34","src/readers/table_section.rs":"e564876825a7b31df2b5dc850279b523e26dc50a08da935cc8d635a49e809951","src/readers/type_section.rs":"c2f9d7b77a1315d323bebe94ced44dc10b77c0e75c1e367bb594a402c74933ba","src/tests.rs":"5d47ec97d0a303d8cbe905f8ddcf11212a03607e0b245c9f52371464e7d08ee7","src/validator.rs":"bec65fde1d8b98d80d082067a6ccf006f35e3f06062cac97887bd5a04ef75192"},"package":"721a8d79483738d7aef6397edcf8f04cd862640b1ad5973adf5bb50fc10e86db"}
--- a/third_party/rust/wasmparser/Cargo.lock
+++ b/third_party/rust/wasmparser/Cargo.lock
@ -43,9 +43,9 @@ dependencies = [

 [[package]]
 name = "bumpalo"
-version = "3.3.0"
+version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5356f1d23ee24a1f785a56d1d1a5f0fd5b0f6a0c0fb2412ce11da71649ab78f6"
+checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820"

 [[package]]
 name = "byteorder"
@ -81,9 +81,9 @@ dependencies = [

 [[package]]
 name = "criterion"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "63f696897c88b57f4ffe3c69d8e1a0613c7d0e6c4833363c8560fbde9c47b966"
+checksum = "70daa7ceec6cf143990669a04c7df13391d55fb27bd4079d252fca774ba244d8"
 dependencies = [
 "atty",
 "cast",
@ -98,6 +98,7 @@ dependencies = [
 "rayon",
 "regex",
 "serde",
+ "serde_cbor",
 "serde_derive",
 "serde_json",
 "tinytemplate",
@ -106,9 +107,9 @@ dependencies = [

 [[package]]
 name = "criterion-plot"
-version = "0.4.2"
+version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ddeaf7989f00f2e1d871a26a110f3ed713632feac17f65f03ca938c542618b60"
+checksum = "e022feadec601fba1649cfa83586381a4ad31c6bf3a9ab7d408118b05dd9889d"
 dependencies = [
 "cast",
 "itertools",
@ -142,12 +143,13 @@ dependencies = [

 [[package]]
 name = "crossbeam-queue"
-version = "0.2.1"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c695eeca1e7173472a32221542ae469b3e9aac3a4fc81f7696bcad82029493db"
+checksum = "774ba60a54c213d409d5353bda12d49cd68d14e45036a285234c8d6f91f92570"
 dependencies = [
 "cfg-if",
 "crossbeam-utils",
+ "maybe-uninit",
 ]

 [[package]]
@ -199,10 +201,16 @@ dependencies = [
 ]

 [[package]]
-name = "hermit-abi"
-version = "0.1.13"
+name = "half"
+version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71"
+checksum = "d36fab90f82edc3c747f9d438e06cf0a491055896f2a279638bb5beed6c40177"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9586eedd4ce6b3c498bc3b4dd92fc9f11166aa908a914071953768066c67909"
 dependencies = [
 "libc",
 ]
@ -218,15 +226,15 @@ dependencies = [

 [[package]]
 name = "itoa"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e"
+checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"

 [[package]]
 name = "js-sys"
-version = "0.3.39"
+version = "0.3.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa5a448de267e7358beaf4a5d849518fe9a0c13fce7afd44b06e68550e5562a7"
+checksum = "c4b9172132a62451e56142bff9afc91c8e4a4500aa5b847da36815b63bfda916"
 dependencies = [
 "wasm-bindgen",
 ]
@ -239,9 +247,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"

 [[package]]
 name = "libc"
-version = "0.2.70"
+version = "0.2.71"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3baa92041a6fec78c687fa0cc2b3fae8884f743d672cf551bed1d6dac6988d0f"
+checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49"

 [[package]]
 name = "log"
@ -275,9 +283,9 @@ dependencies = [

 [[package]]
 name = "num-traits"
-version = "0.2.11"
+version = "0.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096"
+checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611"
 dependencies = [
 "autocfg",
 ]
@ -294,15 +302,15 @@ dependencies = [

 [[package]]
 name = "oorandom"
-version = "11.1.1"
+version = "11.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94af325bc33c7f60191be4e2c984d48aaa21e2854f473b85398344b60c9b6358"
+checksum = "a170cebd8021a008ea92e4db85a72f80b35df514ec664b296fdcbb654eac0b2c"

 [[package]]
 name = "plotters"
-version = "0.2.14"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9b1d9ca091d370ea3a78d5619145d1b59426ab0c9eedbad2514a4cee08bf389"
+checksum = "0d1685fbe7beba33de0330629da9d955ac75bd54f33d7b79f9a895590124f6bb"
 dependencies = [
 "js-sys",
 "num-traits",
@ -312,28 +320,29 @@ dependencies = [

 [[package]]
 name = "proc-macro2"
-version = "1.0.15"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70a50b9351bfa8d65a7d93ce712dc63d2fd15ddbf2c36990fc7cac344859c04f"
+checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa"
 dependencies = [
 "unicode-xid",
 ]

 [[package]]
 name = "quote"
-version = "1.0.6"
+version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea"
+checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
 dependencies = [
 "proc-macro2",
 ]

 [[package]]
 name = "rayon"
-version = "1.3.0"
+version = "1.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098"
+checksum = "62f02856753d04e03e26929f820d0a0a337ebe71f849801eea335d464b349080"
 dependencies = [
+ "autocfg",
 "crossbeam-deque",
 "either",
 "rayon-core",
@ -341,9 +350,9 @@ dependencies = [

 [[package]]
 name = "rayon-core"
-version = "1.7.0"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9"
+checksum = "e92e15d89083484e11353891f1af602cc661426deb9564c298b270c726973280"
 dependencies = [
 "crossbeam-deque",
 "crossbeam-queue",
@ -354,9 +363,9 @@ dependencies = [

 [[package]]
 name = "regex"
-version = "1.3.7"
+version = "1.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6020f034922e3194c711b82a627453881bc4682166cabb07134a10c26ba7692"
+checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
 dependencies = [
 "regex-syntax",
 ]
@ -372,9 +381,9 @@ dependencies = [

 [[package]]
 name = "regex-syntax"
-version = "0.6.17"
+version = "0.6.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae"
+checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"

 [[package]]
 name = "rustc_version"
@ -387,9 +396,9 @@ dependencies = [

 [[package]]
 name = "ryu"
-version = "1.0.4"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1"
+checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"

 [[package]]
 name = "same-file"
@ -423,15 +432,25 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"

 [[package]]
 name = "serde"
-version = "1.0.110"
+version = "1.0.114"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99e7b308464d16b56eba9964e4972a3eee817760ab60d88c3f86e1fecb08204c"
+checksum = "5317f7588f0a5078ee60ef675ef96735a1442132dc645eb1d12c018620ed8cd3"
+
+[[package]]
+name = "serde_cbor"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622"
+dependencies = [
+ "half",
+ "serde",
+]

 [[package]]
 name = "serde_derive"
-version = "1.0.110"
+version = "1.0.114"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "818fbf6bfa9a42d3bfcaca148547aa00c7b915bec71d1757aa2d44ca68771984"
+checksum = "2a0be94b04690fbaed37cddffc5c134bf537c8e3329d53e982fe04c374978f8e"
 dependencies = [
 "proc-macro2",
 "quote",
@ -440,9 +459,9 @@ dependencies = [

 [[package]]
 name = "serde_json"
-version = "1.0.53"
+version = "1.0.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "993948e75b189211a9b31a7528f950c6adc21f9720b6438ff80a7fa2f864cea2"
+checksum = "3433e879a558dde8b5e8feb2a04899cf34fdde1fafb894687e52105fc1162ac3"
 dependencies = [
 "itoa",
 "ryu",
@ -451,9 +470,9 @@ dependencies = [

 [[package]]
 name = "syn"
-version = "1.0.23"
+version = "1.0.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95b5f192649e48a5302a13f2feb224df883b98933222369e4b3b0fe2a5447269"
+checksum = "e8d5d96e8cbb005d6959f119f773bfaebb5684296108fb32600c00cde305b2cd"
 dependencies = [
 "proc-macro2",
 "quote",
@ -471,9 +490,9 @@ dependencies = [

 [[package]]
 name = "tinytemplate"
-version = "1.0.4"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "45e4bc5ac99433e0dcb8b9f309dd271a165ae37dde129b9e0ce1bfdd8bfe4891"
+checksum = "6d3dc76004a03cec1c5932bca4cdc2e39aaa798e3f82363dd94f9adf6098c12f"
 dependencies = [
 "serde",
 "serde_json",
@ -481,15 +500,15 @@ dependencies = [

 [[package]]
 name = "unicode-width"
-version = "0.1.7"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
+checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"

 [[package]]
 name = "unicode-xid"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
+checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"

 [[package]]
 name = "walkdir"
@ -504,9 +523,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen"
-version = "0.2.62"
+version = "0.2.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3c7d40d09cdbf0f4895ae58cf57d92e1e57a9dd8ed2e8390514b54a47cc5551"
+checksum = "6a634620115e4a229108b71bde263bb4220c483b3f07f5ba514ee8d15064c4c2"
 dependencies = [
 "cfg-if",
 "wasm-bindgen-macro",
@ -514,9 +533,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.62"
+version = "0.2.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3972e137ebf830900db522d6c8fd74d1900dcfc733462e9a12e942b00b4ac94"
+checksum = "3e53963b583d18a5aa3aaae4b4c1cb535218246131ba22a71f05b518098571df"
 dependencies = [
 "bumpalo",
 "lazy_static",
@ -529,9 +548,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.62"
+version = "0.2.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2cd85aa2c579e8892442954685f0d801f9129de24fa2136b2c6a539c76b65776"
+checksum = "3fcfd5ef6eec85623b4c6e844293d4516470d8f19cd72d0d12246017eb9060b8"
 dependencies = [
 "quote",
 "wasm-bindgen-macro-support",
@ -539,9 +558,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.62"
+version = "0.2.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8eb197bd3a47553334907ffd2f16507b4f4f01bbec3ac921a7719e0decdfe72a"
+checksum = "9adff9ee0e94b926ca81b57f57f86d5545cdcb1d259e21ec9bdd95b901754c75"
 dependencies = [
 "proc-macro2",
 "quote",
@ -552,13 +571,13 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.62"
+version = "0.2.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a91c2916119c17a8e316507afaaa2dd94b47646048014bbdf6bef098c1bb58ad"
+checksum = "7f7b90ea6c632dd06fd765d44542e234d5e63d9bb917ecd64d79778a13bd79ae"

 [[package]]
 name = "wasmparser"
-version = "0.57.0"
+version = "0.58.0"
 dependencies = [
 "anyhow",
 "criterion",
@ -568,9 +587,9 @@ dependencies = [

 [[package]]
 name = "web-sys"
-version = "0.3.39"
+version = "0.3.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bc359e5dd3b46cb9687a051d50a2fdd228e4ba7cf6fcf861a5365c3d671a642"
+checksum = "863539788676619aac1a23e2df3655e96b32b0e05eb72ca34ba045ad573c625d"
 dependencies = [
 "js-sys",
 "wasm-bindgen",
@ -578,9 +597,9 @@ dependencies = [

 [[package]]
 name = "winapi"
-version = "0.3.8"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
 dependencies = [
 "winapi-i686-pc-windows-gnu",
 "winapi-x86_64-pc-windows-gnu",
--- a/third_party/rust/wasmparser/Cargo.toml
+++ b/third_party/rust/wasmparser/Cargo.toml
@ -13,13 +13,17 @@
 [package]
 edition = "2018"
 name = "wasmparser"
-version = "0.57.0"
+version = "0.58.0"
 authors = ["Yury Delendik <ydelendik@mozilla.com>"]
 description = "A simple event-driven library for parsing WebAssembly binary files.\n"
-homepage = "https://github.com/bytecodealliance/wasm-tools/tree/master/crates/wasmparser"
+homepage = "https://github.com/bytecodealliance/wasm-tools/tree/main/crates/wasmparser"
 keywords = ["parser", "WebAssembly", "wasm"]
 license = "Apache-2.0 WITH LLVM-exception"
-repository = "https://github.com/bytecodealliance/wasm-tools/tree/master/crates/wasmparser"
+repository = "https://github.com/bytecodealliance/wasm-tools/tree/main/crates/wasmparser"
+
+[[bench]]
+name = "benchmark"
+harness = false
 [dev-dependencies.anyhow]
 version = "1.0"

--- a/third_party/rust/wasmparser/benches/benchmark.rs
+++ b/third_party/rust/wasmparser/benches/benchmark.rs
@ -1,9 +1,15 @@
-pub fn read_file_data(path: &PathBuf) -> Vec<u8> {
-    let mut data = Vec::new();
-    let mut f = File::open(path).ok().unwrap();
-    f.read_to_end(&mut data).unwrap();
-    data
-}
+#[macro_use]
+extern crate criterion;
+
+use anyhow::Result;
+use criterion::{black_box, Criterion};
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use wasmparser::{
+    validate, OperatorValidatorConfig, Parser, ParserState, ValidatingParser,
+    ValidatingParserConfig, WasmDecoder,
+};

 const VALIDATOR_CONFIG: Option<ValidatingParserConfig> = Some(ValidatingParserConfig {
    operator_config: OperatorValidatorConfig {
@ -12,81 +18,164 @@ const VALIDATOR_CONFIG: Option<ValidatingParserConfig> = Some(ValidatingParserCo
        enable_simd: true,
        enable_bulk_memory: true,
        enable_multi_value: true,
+        enable_tail_call: true,
+        enable_module_linking: true,
    },
 });

-#[macro_use]
-extern crate criterion;
-extern crate wasmparser;
+/// A benchmark input.
+pub struct BenchmarkInput {
+    /// The path to the benchmark file important for handling errors.
+    pub path: PathBuf,
+    /// The encoded Wasm module that is run by the benchmark.
+    pub wasm: Vec<u8>,
+}

-use criterion::Criterion;
-use wasmparser::{
-    validate, OperatorValidatorConfig, Parser, ParserState, ValidatingParser,
-    ValidatingParserConfig, WasmDecoder,
-};
+impl BenchmarkInput {
+    /// Creates a new benchmark input.
+    pub fn new(test_path: PathBuf, encoded_wasm: Vec<u8>) -> Self {
+        Self {
+            path: test_path,
+            wasm: encoded_wasm,
+        }
+    }
+}

-use std::fs::{read_dir, File};
-use std::io::Read;
-use std::path::PathBuf;
+/// Returns a vector of all found benchmark input files under the given directory.
+///
+/// Benchmark input files can be `.wat` or `.wast` formatted files.
+/// For `.wast` files we pull out all the module directives and run them in the benchmarks.
+fn collect_test_files(path: &Path, list: &mut Vec<BenchmarkInput>) -> Result<()> {
+    for entry in path.read_dir()? {
+        let entry = entry?;
+        let path = entry.path();
+        if path.is_dir() {
+            collect_test_files(&path, list)?;
+            continue;
+        }
+        match path.extension().and_then(|ext| ext.to_str()) {
+            Some("wasm") => {
+                let wasm = fs::read(&path)?;
+                list.push(BenchmarkInput::new(path, wasm));
+            }
+            Some("wat") | Some("txt") => {
+                if let Ok(wasm) = wat::parse_file(&path) {
+                    list.push(BenchmarkInput::new(path, wasm));
+                }
+            }
+            Some("wast") => {
+                let contents = fs::read_to_string(&path)?;
+                let buf = match wast::parser::ParseBuffer::new(&contents) {
+                    Ok(buf) => buf,
+                    Err(_) => continue,
+                };
+                let wast: wast::Wast<'_> = match wast::parser::parse(&buf) {
+                    Ok(wast) => wast,
+                    Err(_) => continue,
+                };
+                for directive in wast.directives {
+                    match directive {
+                        wast::WastDirective::Module(mut module) => {
+                            let wasm = module.encode()?;
+                            list.push(BenchmarkInput::new(path.clone(), wasm));
+                        }
+                        _ => continue,
+                    }
+                }
+            }
+            _ => (),
+        }
+    }
+    Ok(())
+}

-fn read_all_wasm<'a, T>(mut d: T)
+/// Reads the input given the Wasm parser or validator.
+///
+/// The `path` specifies which benchmark input file we are currently operating on
+/// so that we can report better errors in case of failures.
+fn read_all_wasm<'a, T>(path: &PathBuf, mut d: T)
 where
    T: WasmDecoder<'a>,
 {
    loop {
        match *d.read() {
-            ParserState::Error(ref e) => panic!("unexpected error {}", e),
+            ParserState::Error(ref e) => {
+                panic!("unexpected error while reading Wasm from {:?}: {}", path, e)
+            }
            ParserState::EndWasm => return,
            _ => (),
        }
    }
 }

+/// Returns the default benchmark inputs that are proper `wasmparser` benchmark
+/// test inputs.
+fn collect_benchmark_inputs() -> Vec<BenchmarkInput> {
+    let mut ret = Vec::new();
+    collect_test_files("../../tests".as_ref(), &mut ret).unwrap();
+    return ret;
+}
+
 fn it_works_benchmark(c: &mut Criterion) {
-    let mut data: Vec<Vec<u8>> = vec![];
-    for entry in read_dir("tests").unwrap() {
-        let dir = entry.unwrap();
-        if !dir.file_type().unwrap().is_file() {
-            continue;
+    let mut inputs = collect_benchmark_inputs();
+    // Filter out all benchmark inputs that fail to parse via `wasmparser`.
+    inputs.retain(|input| {
+        let mut parser = Parser::new(input.wasm.as_slice());
+        'outer: loop {
+            match parser.read() {
+                ParserState::Error(_) => break 'outer false,
+                ParserState::EndWasm => break 'outer true,
+                _ => continue,
+            }
        }
-        data.push(read_file_data(&dir.path()));
-    }
+    });
    c.bench_function("it works benchmark", move |b| {
-        for d in &mut data {
-            b.iter(|| read_all_wasm(Parser::new(d.as_slice())));
-        }
+        b.iter(|| {
+            for input in &mut inputs {
+                let _ = black_box(read_all_wasm(
+                    &input.path,
+                    Parser::new(input.wasm.as_slice()),
+                ));
+            }
+        })
    });
 }

 fn validator_not_fails_benchmark(c: &mut Criterion) {
-    let mut data: Vec<Vec<u8>> = vec![];
-    for entry in read_dir("tests").unwrap() {
-        let dir = entry.unwrap();
-        if !dir.file_type().unwrap().is_file() {
-            continue;
+    let mut inputs = collect_benchmark_inputs();
+    // Filter out all benchmark inputs that fail to validate via `wasmparser`.
+    inputs.retain(|input| {
+        let mut parser = ValidatingParser::new(input.wasm.as_slice(), VALIDATOR_CONFIG);
+        'outer: loop {
+            match parser.read() {
+                ParserState::Error(_) => break 'outer false,
+                ParserState::EndWasm => break 'outer true,
+                _ => continue,
+            }
        }
-        data.push(read_file_data(&dir.path()));
-    }
+    });
    c.bench_function("validator no fails benchmark", move |b| {
-        for d in &mut data {
-            b.iter(|| read_all_wasm(ValidatingParser::new(d.as_slice(), VALIDATOR_CONFIG)));
-        }
+        b.iter(|| {
+            for input in &mut inputs {
+                let _ = black_box(read_all_wasm(
+                    &input.path,
+                    ValidatingParser::new(input.wasm.as_slice(), VALIDATOR_CONFIG),
+                ));
+            }
+        });
    });
 }

 fn validate_benchmark(c: &mut Criterion) {
-    let mut data: Vec<Vec<u8>> = vec![vec![]];
-    for entry in read_dir("tests").unwrap() {
-        let dir = entry.unwrap();
-        if !dir.file_type().unwrap().is_file() {
-            continue;
-        }
-        data.push(read_file_data(&dir.path()));
-    }
+    let mut inputs = collect_benchmark_inputs();
+    // Filter out all benchmark inputs that fail to validate via `wasmparser`.
+    inputs.retain(|input| validate(input.wasm.as_slice(), VALIDATOR_CONFIG).is_ok());
    c.bench_function("validate benchmark", move |b| {
-        for d in &mut data {
-            b.iter(|| validate(&d, VALIDATOR_CONFIG));
-        }
+        b.iter(|| {
+            for input in &mut inputs {
+                let _ = black_box(validate(input.wasm.as_slice(), VALIDATOR_CONFIG));
+            }
+        })
    });
 }

--- a/third_party/rust/wasmparser/compare-master.sh
+++ b/third_party/rust/wasmparser/compare-master.sh
@ -1,12 +0,0 @@
-#/bin/bash
-
-# record current bench results
-cargo bench --bench benchmark -- --noplot --save-baseline after
-
-# switch to master and record its bench results
-git checkout master && \
-cargo bench --bench benchmark -- --noplot --save-baseline before
-
-# compare
-cargo install critcmp --force && \
-critcmp before after
--- a/third_party/rust/wasmparser/compare-with-main.sh
+++ b/third_party/rust/wasmparser/compare-with-main.sh
@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+set -e
+
+# switch to main branch and record its bench results
+git checkout main && \
+cargo bench --bench benchmark -- --noplot --save-baseline before
+
+# record current bench results
+git checkout - && \
+cargo bench --bench benchmark -- --noplot --baseline before
--- a/third_party/rust/wasmparser/examples/dump.rs
+++ b/third_party/rust/wasmparser/examples/dump.rs
@ -38,7 +38,7 @@ fn main() {
                ref ty,
            } => {
                println!(
-                    "ImportSectionEntry {{ module: \"{}\", field: \"{}\", ty: {:?} }}",
+                    "ImportSectionEntry {{ module: \"{}\", field: {:?}, ty: {:?} }}",
                    module, field, ty
                );
            }
--- a/third_party/rust/wasmparser/examples/simple.rs
+++ b/third_party/rust/wasmparser/examples/simple.rs
@ -29,9 +29,11 @@ fn main() {
            } => {
                println!("  Export {} {:?}", field, kind);
            }
-            ParserState::ImportSectionEntry { module, field, .. } => {
-                println!("  Import {}::{}", module, field)
-            }
+            ParserState::ImportSectionEntry {
+                module,
+                field: Some(field),
+                ..
+            } => println!("  Import {}::{}", module, field),
            ParserState::EndWasm => break,
            ParserState::Error(ref err) => panic!("Error: {:?}", err),
            _ => ( /* println!(" Other {:?}", state); */ ),
--- a/third_party/rust/wasmparser/src/binary_reader.rs
+++ b/third_party/rust/wasmparser/src/binary_reader.rs
@ -18,16 +18,14 @@ use std::convert::TryInto;
 use std::str;
 use std::vec::Vec;

-use crate::limits::{
-    MAX_WASM_FUNCTION_LOCALS, MAX_WASM_FUNCTION_PARAMS, MAX_WASM_FUNCTION_RETURNS,
-    MAX_WASM_FUNCTION_SIZE, MAX_WASM_STRING_SIZE,
-};
+use crate::limits::*;

 use crate::primitives::{
    BinaryReaderError, BrTable, CustomSectionKind, ExternalKind, FuncType, GlobalType, Ieee32,
    Ieee64, LinkingType, MemoryImmediate, MemoryType, NameType, Operator, RelocType,
    ResizableLimits, Result, SIMDLaneIndex, SectionCode, TableType, Type, TypeOrFuncType, V128,
 };
+use crate::{ExportType, Import, ImportSectionEntryType, InstanceType, ModuleType};

 const MAX_WASM_BR_TABLE_SIZE: usize = MAX_WASM_FUNCTION_SIZE;

@ -43,6 +41,7 @@ const WASM_MAGIC_NUMBER: &[u8; 4] = b"\0asm";
 const WASM_EXPERIMENTAL_VERSION: u32 = 0xd;
 const WASM_SUPPORTED_VERSION: u32 = 0x1;

+#[derive(Clone)]
 pub(crate) struct SectionHeader<'a> {
    pub code: SectionCode<'a>,
    pub payload_start: usize,
@ -235,6 +234,9 @@ impl<'a> BinaryReader<'a> {
            1 => Ok(ExternalKind::Table),
            2 => Ok(ExternalKind::Memory),
            3 => Ok(ExternalKind::Global),
+            5 => Ok(ExternalKind::Module),
+            6 => Ok(ExternalKind::Instance),
+            7 => Ok(ExternalKind::Type),
            _ => Err(BinaryReaderError::new(
                "Invalid external kind",
                self.original_position() - 1,
@ -243,13 +245,6 @@ impl<'a> BinaryReader<'a> {
    }

    pub(crate) fn read_func_type(&mut self) -> Result<FuncType> {
-        if self.read_type()? != Type::Func {
-            return Err(BinaryReaderError::new(
-                "type signature is not a func",
-                self.original_position() - 1,
-            ));
-        }
-
        let params_len = self.read_var_u32()? as usize;
        if params_len > MAX_WASM_FUNCTION_PARAMS {
            return Err(BinaryReaderError::new(
@ -278,6 +273,77 @@ impl<'a> BinaryReader<'a> {
        })
    }

+    pub(crate) fn read_module_type(&mut self) -> Result<ModuleType<'a>> {
+        let pos = self.original_position();
+        let imports_len = self.read_var_u32()? as usize;
+        if imports_len > MAX_WASM_IMPORTS {
+            return Err(BinaryReaderError::new("imports size is out of bounds", pos));
+        }
+        Ok(ModuleType {
+            imports: (0..imports_len)
+                .map(|_| self.read_import())
+                .collect::<Result<_>>()?,
+            exports: self.read_export_types()?,
+        })
+    }
+
+    pub(crate) fn read_instance_type(&mut self) -> Result<InstanceType<'a>> {
+        Ok(InstanceType {
+            exports: self.read_export_types()?,
+        })
+    }
+
+    fn read_export_types(&mut self) -> Result<Box<[ExportType<'a>]>> {
+        let pos = self.original_position();
+        let exports_len = self.read_var_u32()? as usize;
+        if exports_len > MAX_WASM_EXPORTS {
+            return Err(BinaryReaderError::new("exports size is out of bound", pos));
+        }
+        (0..exports_len).map(|_| self.read_export_type()).collect()
+    }
+
+    pub(crate) fn read_import(&mut self) -> Result<Import<'a>> {
+        let module = self.read_string()?;
+
+        // For the `field`, figure out if we're the experimental encoding of
+        // single-level imports for the module linking proposal (a single-byte
+        // string which is 0xc0, which is invalid utf-8) or if we have a second
+        // level of import.
+        let mut clone = self.clone();
+        let field = if clone.read_var_u32()? == 1 && clone.read_u8()? == 0xc0 {
+            *self = clone;
+            None
+        } else {
+            Some(self.read_string()?)
+        };
+
+        let ty = self.read_import_desc()?;
+        Ok(Import { module, field, ty })
+    }
+
+    pub(crate) fn read_export_type(&mut self) -> Result<ExportType<'a>> {
+        let name = self.read_string()?;
+        let ty = self.read_import_desc()?;
+        Ok(ExportType { name, ty })
+    }
+
+    pub(crate) fn read_import_desc(&mut self) -> Result<ImportSectionEntryType> {
+        Ok(match self.read_external_kind()? {
+            ExternalKind::Function => ImportSectionEntryType::Function(self.read_var_u32()?),
+            ExternalKind::Table => ImportSectionEntryType::Table(self.read_table_type()?),
+            ExternalKind::Memory => ImportSectionEntryType::Memory(self.read_memory_type()?),
+            ExternalKind::Global => ImportSectionEntryType::Global(self.read_global_type()?),
+            ExternalKind::Module => ImportSectionEntryType::Module(self.read_var_u32()?),
+            ExternalKind::Instance => ImportSectionEntryType::Instance(self.read_var_u32()?),
+            ExternalKind::Type => {
+                return Err(BinaryReaderError::new(
+                    "cannot import types",
+                    self.original_position() - 1,
+                ))
+            }
+        })
+    }
+
    fn read_resizable_limits(&mut self, max_present: bool) -> Result<ResizableLimits> {
        let initial = self.read_var_u32()?;
        let maximum = if max_present {
@ -362,6 +428,10 @@ impl<'a> BinaryReader<'a> {
            10 => Ok(SectionCode::Code),
            11 => Ok(SectionCode::Data),
            12 => Ok(SectionCode::DataCount),
+            100 => Ok(SectionCode::Module),
+            101 => Ok(SectionCode::Instance),
+            102 => Ok(SectionCode::Alias),
+            103 => Ok(SectionCode::ModuleCode),
            _ => Err(BinaryReaderError::new("Invalid section code", offset)),
        }
    }
@ -1526,6 +1596,7 @@ impl<'a> BinaryReader<'a> {
            0x61 => Operator::I8x16Neg,
            0x62 => Operator::I8x16AnyTrue,
            0x63 => Operator::I8x16AllTrue,
+            0x64 => Operator::I8x16Bitmask,
            0x65 => Operator::I8x16NarrowI16x8S,
            0x66 => Operator::I8x16NarrowI16x8U,
            0x6b => Operator::I8x16Shl,
@ -1546,6 +1617,7 @@ impl<'a> BinaryReader<'a> {
            0x81 => Operator::I16x8Neg,
            0x82 => Operator::I16x8AnyTrue,
            0x83 => Operator::I16x8AllTrue,
+            0x84 => Operator::I16x8Bitmask,
            0x85 => Operator::I16x8NarrowI32x4S,
            0x86 => Operator::I16x8NarrowI32x4U,
            0x87 => Operator::I16x8WidenLowI8x16S,
@ -1571,6 +1643,7 @@ impl<'a> BinaryReader<'a> {
            0xa1 => Operator::I32x4Neg,
            0xa2 => Operator::I32x4AnyTrue,
            0xa3 => Operator::I32x4AllTrue,
+            0xa4 => Operator::I32x4Bitmask,
            0xa7 => Operator::I32x4WidenLowI16x8S,
            0xa8 => Operator::I32x4WidenHighI16x8S,
            0xa9 => Operator::I32x4WidenLowI16x8U,
--- a/third_party/rust/wasmparser/src/lib.rs
+++ b/third_party/rust/wasmparser/src/lib.rs
@ -39,15 +39,18 @@ pub use crate::parser::WasmDecoder;
 pub use crate::primitives::BinaryReaderError;
 pub use crate::primitives::BrTable;
 pub use crate::primitives::CustomSectionKind;
+pub use crate::primitives::ExportType;
 pub use crate::primitives::ExternalKind;
 pub use crate::primitives::FuncType;
 pub use crate::primitives::GlobalType;
 pub use crate::primitives::Ieee32;
 pub use crate::primitives::Ieee64;
 pub use crate::primitives::ImportSectionEntryType;
+pub use crate::primitives::InstanceType;
 pub use crate::primitives::LinkingType;
 pub use crate::primitives::MemoryImmediate;
 pub use crate::primitives::MemoryType;
+pub use crate::primitives::ModuleType;
 pub use crate::primitives::NameType;
 pub use crate::primitives::Naming;
 pub use crate::primitives::Operator;
@ -57,6 +60,7 @@ pub use crate::primitives::Result;
 pub use crate::primitives::SectionCode;
 pub use crate::primitives::TableType;
 pub use crate::primitives::Type;
+pub use crate::primitives::TypeDef;
 pub use crate::primitives::TypeOrFuncType;
 pub use crate::primitives::V128;

@ -72,11 +76,15 @@ pub use crate::module_resources::WasmMemoryType;
 pub use crate::module_resources::WasmModuleResources;
 pub use crate::module_resources::WasmTableType;
 pub use crate::module_resources::WasmType;
+pub use crate::module_resources::WasmTypeDef;

 pub(crate) use crate::module_resources::{wasm_func_type_inputs, wasm_func_type_outputs};

 pub use crate::operators_validator::OperatorValidatorConfig;

+pub use crate::readers::Alias;
+pub use crate::readers::AliasSectionReader;
+pub use crate::readers::AliasedInstance;
 pub use crate::readers::CodeSectionReader;
 pub use crate::readers::CustomSectionContent;
 pub use crate::readers::Data;
@ -97,10 +105,13 @@ pub use crate::readers::GlobalSectionReader;
 pub use crate::readers::Import;
 pub use crate::readers::ImportSectionReader;
 pub use crate::readers::InitExpr;
+pub use crate::readers::InstanceSectionReader;
 pub use crate::readers::LinkingSectionReader;
 pub use crate::readers::LocalsReader;
 pub use crate::readers::MemorySectionReader;
+pub use crate::readers::ModuleCodeSectionReader;
 pub use crate::readers::ModuleReader;
+pub use crate::readers::ModuleSectionReader;
 pub use crate::readers::Name;
 pub use crate::readers::NameSectionReader;
 pub use crate::readers::NamingReader;
--- a/third_party/rust/wasmparser/src/limits.rs
+++ b/third_party/rust/wasmparser/src/limits.rs
@ -17,8 +17,8 @@
 // The limits are agreed upon with other engines for consistency.
 pub const MAX_WASM_TYPES: usize = 1_000_000;
 pub const MAX_WASM_FUNCTIONS: usize = 1_000_000;
-pub const _MAX_WASM_IMPORTS: usize = 100_000;
-pub const _MAX_WASM_EXPORTS: usize = 100_000;
+pub const MAX_WASM_IMPORTS: usize = 100_000;
+pub const MAX_WASM_EXPORTS: usize = 100_000;
 pub const MAX_WASM_GLOBALS: usize = 1_000_000;
 pub const _MAX_WASM_DATA_SEGMENTS: usize = 100_000;
 pub const MAX_WASM_MEMORY_PAGES: usize = 65536;
@ -32,3 +32,5 @@ pub const _MAX_WASM_TABLE_SIZE: usize = 10_000_000;
 pub const MAX_WASM_TABLE_ENTRIES: usize = 10_000_000;
 pub const MAX_WASM_TABLES: usize = 1;
 pub const MAX_WASM_MEMORIES: usize = 1;
+pub const MAX_WASM_MODULES: usize = 1_000;
+pub const MAX_WASM_INSTANCES: usize = 1_000;
--- a/third_party/rust/wasmparser/src/module_resources.rs
+++ b/third_party/rust/wasmparser/src/module_resources.rs
@ -13,6 +13,8 @@
 * limitations under the License.
 */

+use crate::{FuncType, TypeDef};
+
 /// Types that qualify as Wasm types for validation purposes.
 ///
 /// Must be comparable with `wasmparser` given Wasm types and
@ -26,6 +28,12 @@ pub trait WasmType: PartialEq<crate::Type> + PartialEq + Eq {
    fn to_parser_type(&self) -> crate::Type;
 }

+pub trait WasmTypeDef {
+    type FuncType: WasmFuncType;
+
+    fn as_func(&self) -> Option<&Self::FuncType>;
+}
+
 /// Types that qualify as Wasm function types for validation purposes.
 pub trait WasmFuncType {
    /// A type that is comparable with Wasm types.
@ -277,7 +285,7 @@ pub trait WasmGlobalType {
 /// the need of an additional parsing or validation step or copying data around.
 pub trait WasmModuleResources {
    /// The function type used for validation.
-    type FuncType: WasmFuncType;
+    type TypeDef: WasmTypeDef;
    /// The table type used for validation.
    type TableType: WasmTableType;
    /// The memory type used for validation.
@ -286,7 +294,7 @@ pub trait WasmModuleResources {
    type GlobalType: WasmGlobalType;

    /// Returns the type at given index.
-    fn type_at(&self, at: u32) -> Option<&Self::FuncType>;
+    fn type_at(&self, at: u32) -> Option<&Self::TypeDef>;
    /// Returns the table at given index if any.
    fn table_at(&self, at: u32) -> Option<&Self::TableType>;
    /// Returns the linear memory at given index.
@ -294,7 +302,7 @@ pub trait WasmModuleResources {
    /// Returns the global variable at given index.
    fn global_at(&self, at: u32) -> Option<&Self::GlobalType>;
    /// Returns the function signature ID at given index.
-    fn func_type_id_at(&self, at: u32) -> Option<u32>;
+    fn func_type_at(&self, at: u32) -> Option<&<Self::TypeDef as WasmTypeDef>::FuncType>;
    /// Returns the element type at the given index.
    fn element_type_at(&self, at: u32) -> Option<crate::Type>;

@ -311,12 +319,12 @@ impl<T> WasmModuleResources for &'_ T
 where
    T: ?Sized + WasmModuleResources,
 {
-    type FuncType = T::FuncType;
+    type TypeDef = T::TypeDef;
    type TableType = T::TableType;
    type MemoryType = T::MemoryType;
    type GlobalType = T::GlobalType;

-    fn type_at(&self, at: u32) -> Option<&Self::FuncType> {
+    fn type_at(&self, at: u32) -> Option<&Self::TypeDef> {
        T::type_at(self, at)
    }
    fn table_at(&self, at: u32) -> Option<&Self::TableType> {
@ -328,8 +336,8 @@ where
    fn global_at(&self, at: u32) -> Option<&Self::GlobalType> {
        T::global_at(self, at)
    }
-    fn func_type_id_at(&self, at: u32) -> Option<u32> {
-        T::func_type_id_at(self, at)
+    fn func_type_at(&self, at: u32) -> Option<&<T::TypeDef as WasmTypeDef>::FuncType> {
+        T::func_type_at(self, at)
    }
    fn element_type_at(&self, at: u32) -> Option<crate::Type> {
        T::element_type_at(self, at)
@ -352,6 +360,17 @@ impl WasmType for crate::Type {
    }
 }

+impl<'a> WasmTypeDef for TypeDef<'a> {
+    type FuncType = FuncType;
+
+    fn as_func(&self) -> Option<&Self::FuncType> {
+        match self {
+            TypeDef::Func(f) => Some(f),
+            _ => None,
+        }
+    }
+}
+
 impl WasmFuncType for crate::FuncType {
    type Type = crate::Type;

--- a/third_party/rust/wasmparser/src/operators_validator.rs
+++ b/third_party/rust/wasmparser/src/operators_validator.rs
@ -18,7 +18,7 @@ use std::cmp::min;
 use crate::primitives::{MemoryImmediate, Operator, SIMDLaneIndex, Type, TypeOrFuncType};
 use crate::{
    wasm_func_type_inputs, wasm_func_type_outputs, BinaryReaderError, WasmFuncType, WasmGlobalType,
-    WasmMemoryType, WasmModuleResources, WasmTableType, WasmType,
+    WasmModuleResources, WasmTableType, WasmType, WasmTypeDef,
 };

 #[derive(Debug)]
@ -113,28 +113,17 @@ impl FuncState {
        }
        Ok(())
    }
-    fn push_block<F: WasmFuncType, T: WasmTableType, M: WasmMemoryType, G: WasmGlobalType>(
+    fn push_block(
        &mut self,
        ty: TypeOrFuncType,
        block_type: BlockType,
-        resources: &dyn WasmModuleResources<
-            FuncType = F,
-            TableType = T,
-            MemoryType = M,
-            GlobalType = G,
-        >,
+        resources: impl WasmModuleResources,
    ) -> OperatorValidatorResult<()> {
        let (start_types, return_types) = match ty {
            TypeOrFuncType::Type(Type::EmptyBlockType) => (vec![], vec![]),
            TypeOrFuncType::Type(ty) => (vec![], vec![ty]),
            TypeOrFuncType::FuncType(idx) => {
-                let ty = resources
-                    .type_at(idx)
-                    // Note: This was an out-of-bounds memory access before
-                    //       the change to return `Option` at `type_at`. So
-                    //       I assumed that invalid indices at this point are
-                    //       bugs.
-                    .expect("function type index is out of bounds");
+                let ty = func_type_at(&resources, idx)?;
                (
                    wasm_func_type_inputs(ty)
                        .map(WasmType::to_parser_type)
@ -328,6 +317,7 @@ pub struct OperatorValidatorConfig {
    pub enable_bulk_memory: bool,
    pub enable_multi_value: bool,
    pub enable_tail_call: bool,
+    pub enable_module_linking: bool,

    #[cfg(feature = "deterministic")]
    pub deterministic_only: bool,
@ -341,6 +331,7 @@ pub(crate) const DEFAULT_OPERATOR_VALIDATOR_CONFIG: OperatorValidatorConfig =
        enable_bulk_memory: false,
        enable_multi_value: true,
        enable_tail_call: false,
+        enable_module_linking: false,

        #[cfg(feature = "deterministic")]
        deterministic_only: true,
@ -532,7 +523,7 @@ impl OperatorValidator {
        function_index: u32,
        resources: impl WasmModuleResources,
    ) -> OperatorValidatorResult<()> {
-        let type_index = match resources.func_type_id_at(function_index) {
+        let ty = match resources.func_type_at(function_index) {
            Some(i) => i,
            None => {
                bail_op_err!(
@ -541,9 +532,6 @@ impl OperatorValidator {
                );
            }
        };
-        let ty = resources
-            .type_at(type_index)
-            .expect("function type index is out of bounds");
        self.check_operands(wasm_func_type_inputs(ty).map(WasmType::to_parser_type))?;
        self.func_state.change_frame_with_types(
            ty.len_inputs(),
@ -563,26 +551,18 @@ impl OperatorValidator {
                "unknown table: table index out of bounds",
            ));
        }
-        match resources.type_at(index) {
-            None => {
-                return Err(OperatorValidatorError::new(
-                    "unknown type: type index out of bounds",
-                ))
-            }
-            Some(ty) => {
-                let types = {
-                    let mut types = Vec::with_capacity(ty.len_inputs() + 1);
-                    types.extend(wasm_func_type_inputs(ty).map(WasmType::to_parser_type));
-                    types.push(Type::I32);
-                    types
-                };
-                self.check_operands(types.into_iter())?;
-                self.func_state.change_frame_with_types(
-                    ty.len_inputs() + 1,
-                    wasm_func_type_outputs(ty).map(WasmType::to_parser_type),
-                )?;
-            }
-        }
+        let ty = func_type_at(&resources, index)?;
+        let types = {
+            let mut types = Vec::with_capacity(ty.len_inputs() + 1);
+            types.extend(wasm_func_type_inputs(ty).map(WasmType::to_parser_type));
+            types.push(Type::I32);
+            types
+        };
+        self.check_operands(types.into_iter())?;
+        self.func_state.change_frame_with_types(
+            ty.len_inputs() + 1,
+            wasm_func_type_outputs(ty).map(WasmType::to_parser_type),
+        )?;
        Ok(())
    }

@ -674,20 +654,10 @@ impl OperatorValidator {
        Ok(())
    }

-    fn check_memory_index<
-        F: WasmFuncType,
-        T: WasmTableType,
-        M: WasmMemoryType,
-        G: WasmGlobalType,
-    >(
+    fn check_memory_index(
        &self,
        memory_index: u32,
-        resources: &dyn WasmModuleResources<
-            FuncType = F,
-            TableType = T,
-            MemoryType = M,
-            GlobalType = G,
-        >,
+        resources: impl WasmModuleResources,
    ) -> OperatorValidatorResult<()> {
        if resources.memory_at(memory_index).is_none() {
            bail_op_err!("unknown memory {}", memory_index);
@ -695,16 +665,11 @@ impl OperatorValidator {
        Ok(())
    }

-    fn check_memarg<F: WasmFuncType, T: WasmTableType, M: WasmMemoryType, G: WasmGlobalType>(
+    fn check_memarg(
        &self,
        memarg: MemoryImmediate,
        max_align: u32,
-        resources: &dyn WasmModuleResources<
-            FuncType = F,
-            TableType = T,
-            MemoryType = M,
-            GlobalType = G,
-        >,
+        resources: impl WasmModuleResources,
    ) -> OperatorValidatorResult<()> {
        self.check_memory_index(0, resources)?;
        let align = memarg.flags;
@ -766,20 +731,10 @@ impl OperatorValidator {
        Ok(())
    }

-    fn check_shared_memarg_wo_align<
-        F: WasmFuncType,
-        T: WasmTableType,
-        M: WasmMemoryType,
-        G: WasmGlobalType,
-    >(
+    fn check_shared_memarg_wo_align(
        &self,
        _: MemoryImmediate,
-        resources: &dyn WasmModuleResources<
-            FuncType = F,
-            TableType = T,
-            MemoryType = M,
-            GlobalType = G,
-        >,
+        resources: impl WasmModuleResources,
    ) -> OperatorValidatorResult<()> {
        self.check_memory_index(0, resources)?;
        Ok(())
@ -792,15 +747,10 @@ impl OperatorValidator {
        Ok(())
    }

-    fn check_block_type<F: WasmFuncType, T: WasmTableType, M: WasmMemoryType, G: WasmGlobalType>(
+    fn check_block_type(
        &self,
        ty: TypeOrFuncType,
-        resources: &dyn WasmModuleResources<
-            FuncType = F,
-            TableType = T,
-            MemoryType = M,
-            GlobalType = G,
-        >,
+        resources: impl WasmModuleResources,
    ) -> OperatorValidatorResult<()> {
        match ty {
            TypeOrFuncType::Type(Type::EmptyBlockType)
@ -812,9 +762,9 @@ impl OperatorValidator {
                self.check_reference_types_enabled()
            }
            TypeOrFuncType::Type(Type::V128) => self.check_simd_enabled(),
-            TypeOrFuncType::FuncType(idx) => match resources.type_at(idx) {
-                None => Err(OperatorValidatorError::new("type index out of bounds")),
-                Some(ty) if !self.config.enable_multi_value => {
+            TypeOrFuncType::FuncType(idx) => {
+                let ty = func_type_at(&resources, idx)?;
+                if !self.config.enable_multi_value {
                    if ty.len_outputs() > 1 {
                        return Err(OperatorValidatorError::new(
                            "blocks, loops, and ifs may only return at most one \
@ -827,38 +777,21 @@ impl OperatorValidator {
                             when multi-value is not enabled",
                        ));
                    }
-                    Ok(())
                }
-                Some(_) => Ok(()),
-            },
+                Ok(())
+            }
            _ => Err(OperatorValidatorError::new("invalid block return type")),
        }
    }

-    fn check_block_params<
-        F: WasmFuncType,
-        T: WasmTableType,
-        M: WasmMemoryType,
-        G: WasmGlobalType,
-    >(
+    fn check_block_params(
        &self,
        ty: TypeOrFuncType,
-        resources: &dyn WasmModuleResources<
-            FuncType = F,
-            TableType = T,
-            MemoryType = M,
-            GlobalType = G,
-        >,
+        resources: impl WasmModuleResources,
        skip: usize,
    ) -> OperatorValidatorResult<()> {
        if let TypeOrFuncType::FuncType(idx) = ty {
-            let func_ty = resources
-                .type_at(idx)
-                // Note: This was an out-of-bounds memory access before
-                //       the change to return `Option` at `type_at`. So
-                //       I assumed that invalid indices at this point are
-                //       bugs.
-                .expect("function type index is out of bounds");
+            let func_ty = func_type_at(&resources, idx)?;
            let len = func_ty.len_inputs();
            self.check_frame_size(len + skip)?;
            for (i, ty) in wasm_func_type_inputs(func_ty).enumerate() {
@ -908,20 +841,10 @@ impl OperatorValidator {
        Ok(Some(ty))
    }

-    pub(crate) fn process_operator<
-        F: WasmFuncType,
-        T: WasmTableType,
-        M: WasmMemoryType,
-        G: WasmGlobalType,
-    >(
+    pub(crate) fn process_operator(
        &mut self,
        operator: &Operator,
-        resources: &dyn WasmModuleResources<
-            FuncType = F,
-            TableType = T,
-            MemoryType = M,
-            GlobalType = G,
-        >,
+        resources: &impl WasmModuleResources,
    ) -> OperatorValidatorResult<FunctionEnd> {
        if self.func_state.end_function {
            return Err(OperatorValidatorError::new("unexpected operator"));
@ -1633,7 +1556,7 @@ impl OperatorValidator {
            }
            Operator::RefFunc { function_index } => {
                self.check_reference_types_enabled()?;
-                if resources.func_type_id_at(function_index).is_none() {
+                if resources.func_type_at(function_index).is_none() {
                    return Err(OperatorValidatorError::new(
                        "unknown function: function index out of bounds",
                    ));
@ -1903,10 +1826,13 @@ impl OperatorValidator {
            }
            Operator::I8x16AnyTrue
            | Operator::I8x16AllTrue
+            | Operator::I8x16Bitmask
            | Operator::I16x8AnyTrue
            | Operator::I16x8AllTrue
+            | Operator::I16x8Bitmask
            | Operator::I32x4AnyTrue
-            | Operator::I32x4AllTrue => {
+            | Operator::I32x4AllTrue
+            | Operator::I32x4Bitmask => {
                self.check_simd_enabled()?;
                self.check_operands_1(Type::V128)?;
                self.func_state.change_frame_with_type(1, Type::I32)?;
@ -2096,3 +2022,19 @@ impl OperatorValidator {
        Ok(())
    }
 }
+
+fn func_type_at<T: WasmModuleResources>(
+    resources: &T,
+    at: u32,
+) -> OperatorValidatorResult<&<T::TypeDef as WasmTypeDef>::FuncType> {
+    let ty = match resources.type_at(at) {
+        Some(ty) => ty,
+        None => {
+            return Err(OperatorValidatorError::new(
+                "unknown type: type index out of bounds",
+            ))
+        }
+    };
+    ty.as_func()
+        .ok_or_else(|| OperatorValidatorError::new("type index not a function type"))
+}
--- a/third_party/rust/wasmparser/src/parser.rs
+++ b/third_party/rust/wasmparser/src/parser.rs
@ -22,19 +22,12 @@ use crate::limits::{
 };

 use crate::primitives::{
-    BinaryReaderError, CustomSectionKind, ExternalKind, FuncType, GlobalType,
-    ImportSectionEntryType, LinkingType, MemoryType, Naming, Operator, RelocType, Result,
-    SectionCode, TableType, Type,
+    BinaryReaderError, CustomSectionKind, ExternalKind, GlobalType, ImportSectionEntryType,
+    LinkingType, MemoryType, Naming, Operator, RelocType, Result, SectionCode, TableType, Type,
+    TypeDef,
 };

-use crate::readers::{
-    CodeSectionReader, Data, DataKind, DataSectionReader, Element, ElementItem, ElementItems,
-    ElementKind, ElementSectionReader, Export, ExportSectionReader, FunctionBody,
-    FunctionSectionReader, Global, GlobalSectionReader, Import, ImportSectionReader,
-    LinkingSectionReader, MemorySectionReader, ModuleReader, Name, NameSectionReader, NamingReader,
-    OperatorsReader, Reloc, RelocSectionReader, Section, SectionReader, TableSectionReader,
-    TypeSectionReader,
-};
+use crate::readers::*;

 use crate::binary_reader::{BinaryReader, Range};

@ -85,10 +78,10 @@ pub enum ParserState<'a> {
    ReadingSectionRawData,
    SectionRawData(&'a [u8]),

-    TypeSectionEntry(FuncType),
+    TypeSectionEntry(TypeDef<'a>),
    ImportSectionEntry {
        module: &'a str,
-        field: &'a str,
+        field: Option<&'a str>,
        ty: ImportSectionEntryType,
    },
    FunctionSectionEntry(u32),
@ -140,6 +133,19 @@ pub enum ParserState<'a> {
    LinkingSectionEntry(LinkingType),

    SourceMappingURL(&'a str),
+
+    ModuleSectionEntry(u32),
+    AliasSectionEntry(Alias),
+    BeginInstantiate {
+        module: u32,
+        count: u32,
+    },
+    InstantiateParameter {
+        kind: ExternalKind,
+        index: u32,
+    },
+    EndInstantiate,
+    InlineModule(ModuleCode<'a>),
 }

 #[derive(Debug, Copy, Clone)]
@ -183,6 +189,10 @@ enum ParserSectionReader<'a> {
    NameSectionReader(NameSectionReader<'a>),
    LinkingSectionReader(LinkingSectionReader<'a>),
    RelocSectionReader(RelocSectionReader<'a>),
+    ModuleSectionReader(ModuleSectionReader<'a>),
+    AliasSectionReader(AliasSectionReader<'a>),
+    InstanceSectionReader(InstanceSectionReader<'a>),
+    ModuleCodeSectionReader(ModuleCodeSectionReader<'a>),
 }

 macro_rules! section_reader {
@ -221,6 +231,7 @@ pub struct Parser<'a> {
    current_data_segment: Option<&'a [u8]>,
    binary_reader: Option<BinaryReader<'a>>,
    operators_reader: Option<OperatorsReader<'a>>,
+    instance_args: Option<InstanceArgsReader<'a>>,
    section_entries_left: u32,
 }

@ -247,6 +258,7 @@ impl<'a> Parser<'a> {
            current_data_segment: None,
            binary_reader: None,
            operators_reader: None,
+            instance_args: None,
            section_entries_left: 0,
        }
    }
@ -681,6 +693,63 @@ impl<'a> Parser<'a> {
        Ok(())
    }

+    fn read_module_entry(&mut self) -> Result<()> {
+        if self.section_entries_left == 0 {
+            return self.check_section_end();
+        }
+        let module_ty = section_reader!(self, ModuleSectionReader).read()?;
+        self.state = ParserState::ModuleSectionEntry(module_ty);
+        self.section_entries_left -= 1;
+        Ok(())
+    }
+
+    fn read_alias_entry(&mut self) -> Result<()> {
+        if self.section_entries_left == 0 {
+            return self.check_section_end();
+        }
+        let alias_ty = section_reader!(self, AliasSectionReader).read()?;
+        self.state = ParserState::AliasSectionEntry(alias_ty);
+        self.section_entries_left -= 1;
+        Ok(())
+    }
+
+    fn read_instance_entry(&mut self) -> Result<()> {
+        if self.section_entries_left == 0 {
+            return self.check_section_end();
+        }
+        let instance = section_reader!(self, InstanceSectionReader).read()?;
+        let args = instance.args()?;
+        self.state = ParserState::BeginInstantiate {
+            module: instance.module(),
+            count: args.get_count(),
+        };
+        self.instance_args = Some(args);
+        self.section_entries_left -= 1;
+        Ok(())
+    }
+
+    fn read_instantiate_field(&mut self) -> Result<()> {
+        let instance = self.instance_args.as_mut().unwrap();
+        if instance.eof() {
+            self.instance_args = None;
+            self.state = ParserState::EndInstantiate;
+        } else {
+            let (kind, index) = self.instance_args.as_mut().unwrap().read()?;
+            self.state = ParserState::InstantiateParameter { kind, index };
+        }
+        Ok(())
+    }
+
+    fn read_module_code_entry(&mut self) -> Result<()> {
+        if self.section_entries_left == 0 {
+            return self.check_section_end();
+        }
+        let module = section_reader!(self, ModuleCodeSectionReader).read()?;
+        self.state = ParserState::InlineModule(module);
+        self.section_entries_left -= 1;
+        Ok(())
+    }
+
    fn read_section_body(&mut self) -> Result<()> {
        match self.state {
            ParserState::BeginSection {
@ -775,6 +844,38 @@ impl<'a> Parser<'a> {
                    .get_data_count_section_content()?;
                self.state = ParserState::DataCountSectionEntry(func_index);
            }
+            ParserState::BeginSection {
+                code: SectionCode::Module,
+                ..
+            } => {
+                start_section_reader!(self, ModuleSectionReader, get_module_section_reader);
+                self.read_module_entry()?;
+            }
+            ParserState::BeginSection {
+                code: SectionCode::Alias,
+                ..
+            } => {
+                start_section_reader!(self, AliasSectionReader, get_alias_section_reader);
+                self.read_alias_entry()?;
+            }
+            ParserState::BeginSection {
+                code: SectionCode::Instance,
+                ..
+            } => {
+                start_section_reader!(self, InstanceSectionReader, get_instance_section_reader);
+                self.read_instance_entry()?;
+            }
+            ParserState::BeginSection {
+                code: SectionCode::ModuleCode,
+                ..
+            } => {
+                start_section_reader!(
+                    self,
+                    ModuleCodeSectionReader,
+                    get_module_code_section_reader
+                );
+                self.read_module_code_entry()?;
+            }
            ParserState::BeginSection {
                code: SectionCode::Custom { .. },
                ..
@ -849,6 +950,10 @@ impl<'a> Parser<'a> {
            ParserSectionReader::TypeSectionReader(ref reader) => reader.ensure_end()?,
            ParserSectionReader::LinkingSectionReader(ref reader) => reader.ensure_end()?,
            ParserSectionReader::RelocSectionReader(ref reader) => reader.ensure_end()?,
+            ParserSectionReader::ModuleSectionReader(ref reader) => reader.ensure_end()?,
+            ParserSectionReader::ModuleCodeSectionReader(ref reader) => reader.ensure_end()?,
+            ParserSectionReader::InstanceSectionReader(ref reader) => reader.ensure_end()?,
+            ParserSectionReader::AliasSectionReader(ref reader) => reader.ensure_end()?,
            _ => unreachable!(),
        }
        self.position_to_section_end()
@ -984,6 +1089,13 @@ impl<'a> Parser<'a> {
            ParserState::ReadingSectionRawData | ParserState::SectionRawData(_) => {
                self.read_section_body_bytes()?
            }
+            ParserState::ModuleSectionEntry(_) => self.read_module_entry()?,
+            ParserState::AliasSectionEntry(_) => self.read_alias_entry()?,
+            ParserState::BeginInstantiate { .. } | ParserState::InstantiateParameter { .. } => {
+                self.read_instantiate_field()?
+            }
+            ParserState::EndInstantiate => self.read_instance_entry()?,
+            ParserState::InlineModule(_) => self.read_module_code_entry()?,
        }
        Ok(())
    }
@ -1085,7 +1197,7 @@ impl<'a> WasmDecoder<'a> for Parser<'a> {
    /// #              0x80, 0x80, 0x0, 0x0, 0xa, 0x91, 0x80, 0x80, 0x80, 0x0,
    /// #              0x2, 0x83, 0x80, 0x80, 0x80, 0x0, 0x0, 0x1, 0xb, 0x83,
    /// #              0x80, 0x80, 0x80, 0x0, 0x0, 0x0, 0xb];
-    /// use wasmparser::{WasmDecoder, Parser, ParserState};
+    /// use wasmparser::{WasmDecoder, Parser, ParserState, TypeDef};
    /// let mut parser = Parser::new(data);
    /// let mut types = Vec::new();
    /// let mut function_types = Vec::new();
@ -1094,7 +1206,7 @@ impl<'a> WasmDecoder<'a> for Parser<'a> {
    ///     match parser.read() {
    ///         ParserState::Error(_) |
    ///         ParserState::EndWasm => break,
-    ///         ParserState::TypeSectionEntry(ty) => {
+    ///         ParserState::TypeSectionEntry(TypeDef::Func(ty)) => {
    ///             types.push(ty.clone());
    ///         }
    ///         ParserState::FunctionSectionEntry(id) => {
@ -1182,3 +1294,20 @@ impl<'a> WasmDecoder<'a> for Parser<'a> {
        &self.state
    }
 }
+
+impl<'a> From<ModuleReader<'a>> for Parser<'a> {
+    fn from(reader: ModuleReader<'a>) -> Parser<'a> {
+        let mut parser = Parser::default();
+        parser.state = ParserState::BeginWasm {
+            version: reader.get_version(),
+        };
+        parser.module_reader = Some(reader);
+        return parser;
+    }
+}
+
+impl<'a> Default for Parser<'a> {
+    fn default() -> Parser<'a> {
+        Parser::new(&[])
+    }
+}
--- a/third_party/rust/wasmparser/src/primitives.rs
+++ b/third_party/rust/wasmparser/src/primitives.rs
@ -83,18 +83,22 @@ pub enum SectionCode<'a> {
        name: &'a str,
        kind: CustomSectionKind,
    },
-    Type,      // Function signature declarations
-    Import,    // Import declarations
-    Function,  // Function declarations
-    Table,     // Indirect function table and other tables
-    Memory,    // Memory attributes
-    Global,    // Global declarations
-    Export,    // Exports
-    Start,     // Start function declaration
-    Element,   // Elements section
-    Code,      // Function bodies (code)
-    Data,      // Data segments
-    DataCount, // Count of passive data segments
+    Type,       // Function signature declarations
+    Alias,      // Aliased indices from nested/parent modules
+    Import,     // Import declarations
+    Module,     // Module declarations
+    Instance,   // Instance definitions
+    Function,   // Function declarations
+    Table,      // Indirect function table and other tables
+    Memory,     // Memory attributes
+    Global,     // Global declarations
+    Export,     // Exports
+    Start,      // Start function declaration
+    Element,    // Elements section
+    ModuleCode, // Module definitions
+    Code,       // Function bodies (code)
+    Data,       // Data segments
+    DataCount,  // Count of passive data segments
 }

 /// Types as defined [here].
@ -135,6 +139,16 @@ pub enum ExternalKind {
    Table,
    Memory,
    Global,
+    Type,
+    Module,
+    Instance,
+}
+
+#[derive(Debug, Clone)]
+pub enum TypeDef<'a> {
+    Func(FuncType),
+    Instance(InstanceType<'a>),
+    Module(ModuleType<'a>),
 }

 #[derive(Debug, Clone, Eq, PartialEq, Hash)]
@ -143,6 +157,23 @@ pub struct FuncType {
    pub returns: Box<[Type]>,
 }

+#[derive(Debug, Clone)]
+pub struct InstanceType<'a> {
+    pub exports: Box<[ExportType<'a>]>,
+}
+
+#[derive(Debug, Clone)]
+pub struct ModuleType<'a> {
+    pub imports: Box<[crate::Import<'a>]>,
+    pub exports: Box<[ExportType<'a>]>,
+}
+
+#[derive(Debug, Clone)]
+pub struct ExportType<'a> {
+    pub name: &'a str,
+    pub ty: ImportSectionEntryType,
+}
+
 #[derive(Debug, Copy, Clone)]
 pub struct ResizableLimits {
    pub initial: u32,
@ -161,7 +192,7 @@ pub struct MemoryType {
    pub shared: bool,
 }

-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub struct GlobalType {
    pub content_type: Type,
    pub mutable: bool,
@ -173,6 +204,8 @@ pub enum ImportSectionEntryType {
    Table(TableType),
    Memory(MemoryType),
    Global(GlobalType),
+    Module(u32),
+    Instance(u32),
 }

 #[derive(Debug, Copy, Clone)]
@ -617,6 +650,7 @@ pub enum Operator<'a> {
    I8x16Neg,
    I8x16AnyTrue,
    I8x16AllTrue,
+    I8x16Bitmask,
    I8x16Shl,
    I8x16ShrS,
    I8x16ShrU,
@ -634,6 +668,7 @@ pub enum Operator<'a> {
    I16x8Neg,
    I16x8AnyTrue,
    I16x8AllTrue,
+    I16x8Bitmask,
    I16x8Shl,
    I16x8ShrS,
    I16x8ShrU,
@ -652,6 +687,7 @@ pub enum Operator<'a> {
    I32x4Neg,
    I32x4AnyTrue,
    I32x4AllTrue,
+    I32x4Bitmask,
    I32x4Shl,
    I32x4ShrS,
    I32x4ShrU,
--- a/third_party/rust/wasmparser/src/readers/alias_section.rs
+++ b/third_party/rust/wasmparser/src/readers/alias_section.rs
@ -0,0 +1,84 @@
+use crate::{
+    BinaryReader, BinaryReaderError, ExternalKind, Result, SectionIteratorLimited, SectionReader,
+    SectionWithLimitedItems,
+};
+
+pub struct AliasSectionReader<'a> {
+    reader: BinaryReader<'a>,
+    count: u32,
+}
+
+#[derive(Debug)]
+pub struct Alias {
+    pub instance: AliasedInstance,
+    pub kind: ExternalKind,
+    pub index: u32,
+}
+
+#[derive(Debug)]
+pub enum AliasedInstance {
+    Parent,
+    Child(u32),
+}
+
+impl<'a> AliasSectionReader<'a> {
+    pub fn new(data: &'a [u8], offset: usize) -> Result<AliasSectionReader<'a>> {
+        let mut reader = BinaryReader::new_with_offset(data, offset);
+        let count = reader.read_var_u32()?;
+        Ok(AliasSectionReader { reader, count })
+    }
+
+    pub fn original_position(&self) -> usize {
+        self.reader.original_position()
+    }
+
+    pub fn get_count(&self) -> u32 {
+        self.count
+    }
+
+    pub fn read(&mut self) -> Result<Alias> {
+        Ok(Alias {
+            instance: match self.reader.read_u8()? {
+                0x00 => AliasedInstance::Child(self.reader.read_var_u32()?),
+                0x01 => AliasedInstance::Parent,
+                _ => {
+                    return Err(BinaryReaderError::new(
+                        "invalid byte in alias",
+                        self.original_position() - 1,
+                    ))
+                }
+            },
+            kind: self.reader.read_external_kind()?,
+            index: self.reader.read_var_u32()?,
+        })
+    }
+}
+
+impl<'a> SectionReader for AliasSectionReader<'a> {
+    type Item = Alias;
+
+    fn read(&mut self) -> Result<Self::Item> {
+        AliasSectionReader::read(self)
+    }
+    fn eof(&self) -> bool {
+        self.reader.eof()
+    }
+    fn original_position(&self) -> usize {
+        AliasSectionReader::original_position(self)
+    }
+}
+
+impl<'a> SectionWithLimitedItems for AliasSectionReader<'a> {
+    fn get_count(&self) -> u32 {
+        AliasSectionReader::get_count(self)
+    }
+}
+
+impl<'a> IntoIterator for AliasSectionReader<'a> {
+    type Item = Result<Alias>;
+    type IntoIter = SectionIteratorLimited<AliasSectionReader<'a>>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        SectionIteratorLimited::new(self)
+    }
+}
--- a/third_party/rust/wasmparser/src/readers/import_section.rs
+++ b/third_party/rust/wasmparser/src/readers/import_section.rs
@ -13,15 +13,15 @@
 * limitations under the License.
 */

-use super::{
-    BinaryReader, ExternalKind, ImportSectionEntryType, Result, SectionIteratorLimited,
-    SectionReader, SectionWithLimitedItems,
+use crate::{
+    BinaryReader, ImportSectionEntryType, Result, SectionIteratorLimited, SectionReader,
+    SectionWithLimitedItems,
 };

 #[derive(Debug, Copy, Clone)]
 pub struct Import<'a> {
    pub module: &'a str,
-    pub field: &'a str,
+    pub field: Option<&'a str>,
    pub ty: ImportSectionEntryType,
 }

@ -67,16 +67,7 @@ impl<'a> ImportSectionReader<'a> {
    where
        'a: 'b,
    {
-        let module = self.reader.read_string()?;
-        let field = self.reader.read_string()?;
-        let kind = self.reader.read_external_kind()?;
-        let ty = match kind {
-            ExternalKind::Function => ImportSectionEntryType::Function(self.reader.read_var_u32()?),
-            ExternalKind::Table => ImportSectionEntryType::Table(self.reader.read_table_type()?),
-            ExternalKind::Memory => ImportSectionEntryType::Memory(self.reader.read_memory_type()?),
-            ExternalKind::Global => ImportSectionEntryType::Global(self.reader.read_global_type()?),
-        };
-        Ok(Import { module, field, ty })
+        self.reader.read_import()
    }
 }

--- a/third_party/rust/wasmparser/src/readers/instance_section.rs
+++ b/third_party/rust/wasmparser/src/readers/instance_section.rs
@ -0,0 +1,147 @@
+use crate::{
+    BinaryReader, ExternalKind, Result, SectionIteratorLimited, SectionReader,
+    SectionWithLimitedItems,
+};
+
+pub struct InstanceSectionReader<'a> {
+    reader: BinaryReader<'a>,
+    count: u32,
+}
+
+impl<'a> InstanceSectionReader<'a> {
+    pub fn new(data: &'a [u8], offset: usize) -> Result<InstanceSectionReader<'a>> {
+        let mut reader = BinaryReader::new_with_offset(data, offset);
+        let count = reader.read_var_u32()?;
+        Ok(InstanceSectionReader { reader, count })
+    }
+
+    pub fn original_position(&self) -> usize {
+        self.reader.original_position()
+    }
+
+    pub fn get_count(&self) -> u32 {
+        self.count
+    }
+
+    pub fn read(&mut self) -> Result<Instance<'a>> {
+        let instance = Instance::new(
+            &self.reader.buffer[self.reader.position..],
+            self.original_position(),
+        )?;
+        self.reader.skip_var_32()?;
+        let count = self.reader.read_var_u32()?;
+        for _ in 0..count {
+            self.reader.skip_bytes(1)?;
+            self.reader.skip_var_32()?;
+        }
+        Ok(instance)
+    }
+}
+
+impl<'a> SectionReader for InstanceSectionReader<'a> {
+    type Item = Instance<'a>;
+
+    fn read(&mut self) -> Result<Self::Item> {
+        InstanceSectionReader::read(self)
+    }
+    fn eof(&self) -> bool {
+        self.reader.eof()
+    }
+    fn original_position(&self) -> usize {
+        InstanceSectionReader::original_position(self)
+    }
+}
+
+impl<'a> SectionWithLimitedItems for InstanceSectionReader<'a> {
+    fn get_count(&self) -> u32 {
+        InstanceSectionReader::get_count(self)
+    }
+}
+
+impl<'a> IntoIterator for InstanceSectionReader<'a> {
+    type Item = Result<Instance<'a>>;
+    type IntoIter = SectionIteratorLimited<InstanceSectionReader<'a>>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        SectionIteratorLimited::new(self)
+    }
+}
+
+pub struct Instance<'a> {
+    reader: BinaryReader<'a>,
+    module: u32,
+}
+
+impl<'a> Instance<'a> {
+    pub fn new(data: &'a [u8], offset: usize) -> Result<Instance<'a>> {
+        let mut reader = BinaryReader::new_with_offset(data, offset);
+        let module = reader.read_var_u32()?;
+        Ok(Instance { module, reader })
+    }
+
+    pub fn original_position(&self) -> usize {
+        self.reader.original_position()
+    }
+
+    pub fn module(&self) -> u32 {
+        self.module
+    }
+
+    pub fn args(&self) -> Result<InstanceArgsReader<'a>> {
+        let mut reader = self.reader.clone();
+        let count = reader.read_var_u32()?;
+        Ok(InstanceArgsReader {
+            count,
+            remaining: count,
+            reader,
+        })
+    }
+}
+
+pub struct InstanceArgsReader<'a> {
+    reader: BinaryReader<'a>,
+    count: u32,
+    remaining: u32,
+}
+
+impl<'a> InstanceArgsReader<'a> {
+    pub fn original_position(&self) -> usize {
+        self.reader.original_position()
+    }
+
+    pub fn read(&mut self) -> Result<(ExternalKind, u32)> {
+        let kind = self.reader.read_external_kind()?;
+        let index = self.reader.read_var_u32()?;
+        self.remaining -= 1;
+        Ok((kind, index))
+    }
+}
+
+impl<'a> SectionReader for InstanceArgsReader<'a> {
+    type Item = (ExternalKind, u32);
+
+    fn read(&mut self) -> Result<Self::Item> {
+        InstanceArgsReader::read(self)
+    }
+    fn eof(&self) -> bool {
+        self.remaining == 0
+    }
+    fn original_position(&self) -> usize {
+        InstanceArgsReader::original_position(self)
+    }
+}
+
+impl<'a> SectionWithLimitedItems for InstanceArgsReader<'a> {
+    fn get_count(&self) -> u32 {
+        self.count
+    }
+}
+
+impl<'a> IntoIterator for InstanceArgsReader<'a> {
+    type Item = Result<(ExternalKind, u32)>;
+    type IntoIter = SectionIteratorLimited<InstanceArgsReader<'a>>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        SectionIteratorLimited::new(self)
+    }
+}
--- a/third_party/rust/wasmparser/src/readers/mod.rs
+++ b/third_party/rust/wasmparser/src/readers/mod.rs
@ -14,9 +14,8 @@
 */

 use super::{
-    BinaryReader, BinaryReaderError, CustomSectionKind, ExternalKind, FuncType, GlobalType,
-    ImportSectionEntryType, LinkingType, MemoryType, NameType, Naming, Operator, Range, RelocType,
-    Result, SectionCode, TableType, Type,
+    BinaryReader, BinaryReaderError, CustomSectionKind, ExternalKind, GlobalType, LinkingType,
+    MemoryType, NameType, Naming, Operator, Range, RelocType, Result, SectionCode, TableType, Type,
 };

 use super::SectionHeader;
@ -56,6 +55,7 @@ pub use self::section_reader::SectionIteratorLimited;
 pub use self::section_reader::SectionReader;
 pub use self::section_reader::SectionWithLimitedItems;

+pub use self::name_section::FunctionLocalReader;
 pub use self::name_section::FunctionName;
 pub use self::name_section::LocalName;
 pub use self::name_section::ModuleName;
@ -77,6 +77,12 @@ use self::sourcemappingurl_section::read_sourcemappingurl_section_content;

 pub use self::operators::OperatorsReader;

+pub use self::alias_section::*;
+pub use self::instance_section::*;
+pub use self::module_code_section::*;
+pub use self::module_section::*;
+
+mod alias_section;
 mod code_section;
 mod data_count_section;
 mod data_section;
@ -86,9 +92,12 @@ mod function_section;
 mod global_section;
 mod import_section;
 mod init_expr;
+mod instance_section;
 mod linking_section;
 mod memory_section;
 mod module;
+mod module_code_section;
+mod module_section;
 mod name_section;
 mod operators;
 mod producers_section;
--- a/third_party/rust/wasmparser/src/readers/module.rs
+++ b/third_party/rust/wasmparser/src/readers/module.rs
@ -13,15 +13,18 @@
 * limitations under the License.
 */

+use std::fmt;
+
 use super::{
    BinaryReader, BinaryReaderError, CustomSectionKind, Range, Result, SectionCode, SectionHeader,
 };

 use super::{
    read_data_count_section_content, read_sourcemappingurl_section_content,
-    read_start_section_content, CodeSectionReader, DataSectionReader, ElementSectionReader,
-    ExportSectionReader, FunctionSectionReader, GlobalSectionReader, ImportSectionReader,
-    LinkingSectionReader, MemorySectionReader, NameSectionReader, ProducersSectionReader,
+    read_start_section_content, AliasSectionReader, CodeSectionReader, DataSectionReader,
+    ElementSectionReader, ExportSectionReader, FunctionSectionReader, GlobalSectionReader,
+    ImportSectionReader, InstanceSectionReader, LinkingSectionReader, MemorySectionReader,
+    ModuleCodeSectionReader, ModuleSectionReader, NameSectionReader, ProducersSectionReader,
    RelocSectionReader, TableSectionReader, TypeSectionReader,
 };

@ -232,6 +235,46 @@ impl<'a> Section<'a> {
        }
    }

+    pub fn get_module_section_reader<'b>(&self) -> Result<ModuleSectionReader<'b>>
+    where
+        'a: 'b,
+    {
+        match self.code {
+            SectionCode::Module => ModuleSectionReader::new(self.data, self.offset),
+            _ => panic!("Invalid state for get_module_section_reader"),
+        }
+    }
+
+    pub fn get_alias_section_reader<'b>(&self) -> Result<AliasSectionReader<'b>>
+    where
+        'a: 'b,
+    {
+        match self.code {
+            SectionCode::Alias => AliasSectionReader::new(self.data, self.offset),
+            _ => panic!("Invalid state for get_alias_section_reader"),
+        }
+    }
+
+    pub fn get_instance_section_reader<'b>(&self) -> Result<InstanceSectionReader<'b>>
+    where
+        'a: 'b,
+    {
+        match self.code {
+            SectionCode::Instance => InstanceSectionReader::new(self.data, self.offset),
+            _ => panic!("Invalid state for get_instance_section_reader"),
+        }
+    }
+
+    pub fn get_module_code_section_reader<'b>(&self) -> Result<ModuleCodeSectionReader<'b>>
+    where
+        'a: 'b,
+    {
+        match self.code {
+            SectionCode::ModuleCode => ModuleCodeSectionReader::new(self.data, self.offset),
+            _ => panic!("Invalid state for get_module_code_section_reader"),
+        }
+    }
+
    pub fn get_binary_reader<'b>(&self) -> BinaryReader<'b>
    where
        'a: 'b,
@ -239,6 +282,10 @@ impl<'a> Section<'a> {
        BinaryReader::new_with_offset(self.data, self.offset)
    }

+    pub fn content_raw(&self) -> &'a [u8] {
+        self.data
+    }
+
    pub fn range(&self) -> Range {
        Range {
            start: self.offset,
@ -262,6 +309,12 @@ impl<'a> Section<'a> {
            SectionCode::Table => SectionContent::Table(self.get_table_section_reader()?),
            SectionCode::Element => SectionContent::Element(self.get_element_section_reader()?),
            SectionCode::Start => SectionContent::Start(self.get_start_section_content()?),
+            SectionCode::Module => SectionContent::Module(self.get_module_section_reader()?),
+            SectionCode::Alias => SectionContent::Alias(self.get_alias_section_reader()?),
+            SectionCode::Instance => SectionContent::Instance(self.get_instance_section_reader()?),
+            SectionCode::ModuleCode => {
+                SectionContent::ModuleCode(self.get_module_code_section_reader()?)
+            }
            SectionCode::DataCount => {
                SectionContent::DataCount(self.get_data_count_section_content()?)
            }
@ -323,6 +376,10 @@ pub enum SectionContent<'a> {
        binary: BinaryReader<'a>,
        content: Option<CustomSectionContent<'a>>,
    },
+    Module(ModuleSectionReader<'a>),
+    Alias(AliasSectionReader<'a>),
+    Instance(InstanceSectionReader<'a>),
+    ModuleCode(ModuleCodeSectionReader<'a>),
 }

 pub enum CustomSectionContent<'a> {
@ -334,6 +391,7 @@ pub enum CustomSectionContent<'a> {
 }

 /// Reads top-level WebAssembly file structure: header and sections.
+#[derive(Clone)]
 pub struct ModuleReader<'a> {
    reader: BinaryReader<'a>,
    version: u32,
@ -342,7 +400,11 @@ pub struct ModuleReader<'a> {

 impl<'a> ModuleReader<'a> {
    pub fn new(data: &[u8]) -> Result<ModuleReader> {
-        let mut reader = BinaryReader::new(data);
+        ModuleReader::new_with_offset(data, 0)
+    }
+
+    pub(crate) fn new_with_offset(data: &[u8], offset: usize) -> Result<ModuleReader> {
+        let mut reader = BinaryReader::new_with_offset(data, offset);
        let version = reader.read_file_header()?;
        Ok(ModuleReader {
            reader,
@ -362,6 +424,10 @@ impl<'a> ModuleReader<'a> {
        }
    }

+    pub fn original_position(&self) -> usize {
+        self.reader.original_position()
+    }
+
    pub fn eof(&self) -> bool {
        self.read_ahead.is_none() && self.reader.eof()
    }
@ -412,11 +478,12 @@ impl<'a> ModuleReader<'a> {
        };
        let payload_end = payload_start + payload_len;
        self.verify_section_end(payload_end)?;
+        let offset = self.reader.original_position();
        let body_start = self.reader.position;
        self.reader.skip_to(payload_end);
        Ok(Section {
            code,
-            offset: body_start,
+            offset,
            data: &self.reader.buffer[body_start..payload_end],
        })
    }
@ -481,6 +548,14 @@ impl<'a> IntoIterator for ModuleReader<'a> {
    }
 }

+impl<'a> fmt::Debug for ModuleReader<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("ModuleReader")
+            .field("version", &self.version)
+            .finish()
+    }
+}
+
 pub struct ModuleIterator<'a> {
    reader: ModuleReader<'a>,
    err: bool,
--- a/third_party/rust/wasmparser/src/readers/module_code_section.rs
+++ b/third_party/rust/wasmparser/src/readers/module_code_section.rs
@ -0,0 +1,93 @@
+use crate::{
+    BinaryReader, BinaryReaderError, ModuleReader, Result, SectionIteratorLimited, SectionReader,
+    SectionWithLimitedItems,
+};
+
+pub struct ModuleCodeSectionReader<'a> {
+    reader: BinaryReader<'a>,
+    count: u32,
+}
+
+#[derive(Debug)]
+pub struct ModuleCode<'a> {
+    reader: BinaryReader<'a>,
+}
+
+impl<'a> ModuleCodeSectionReader<'a> {
+    pub fn new(data: &'a [u8], offset: usize) -> Result<ModuleCodeSectionReader<'a>> {
+        let mut reader = BinaryReader::new_with_offset(data, offset);
+        let count = reader.read_var_u32()?;
+        Ok(ModuleCodeSectionReader { reader, count })
+    }
+
+    pub fn original_position(&self) -> usize {
+        self.reader.original_position()
+    }
+
+    pub fn get_count(&self) -> u32 {
+        self.count
+    }
+
+    fn verify_module_end(&self, end: usize) -> Result<()> {
+        if self.reader.buffer.len() < end {
+            return Err(BinaryReaderError::new(
+                "module body extends past end of the module code section",
+                self.reader.original_offset + self.reader.buffer.len(),
+            ));
+        }
+        Ok(())
+    }
+
+    pub fn read(&mut self) -> Result<ModuleCode<'a>> {
+        let size = self.reader.read_var_u32()? as usize;
+        let module_start = self.reader.position;
+        let module_end = module_start + size;
+        self.verify_module_end(module_end)?;
+        self.reader.skip_to(module_end);
+        Ok(ModuleCode {
+            reader: BinaryReader::new_with_offset(
+                &self.reader.buffer[module_start..module_end],
+                self.reader.original_offset + module_start,
+            ),
+        })
+    }
+}
+
+impl<'a> SectionReader for ModuleCodeSectionReader<'a> {
+    type Item = ModuleCode<'a>;
+
+    fn read(&mut self) -> Result<Self::Item> {
+        ModuleCodeSectionReader::read(self)
+    }
+    fn eof(&self) -> bool {
+        self.reader.eof()
+    }
+    fn original_position(&self) -> usize {
+        ModuleCodeSectionReader::original_position(self)
+    }
+}
+
+impl<'a> SectionWithLimitedItems for ModuleCodeSectionReader<'a> {
+    fn get_count(&self) -> u32 {
+        ModuleCodeSectionReader::get_count(self)
+    }
+}
+
+impl<'a> IntoIterator for ModuleCodeSectionReader<'a> {
+    type Item = Result<ModuleCode<'a>>;
+    type IntoIter = SectionIteratorLimited<ModuleCodeSectionReader<'a>>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        SectionIteratorLimited::new(self)
+    }
+}
+
+impl<'a> ModuleCode<'a> {
+    pub fn module(&self) -> Result<ModuleReader<'a>> {
+        ModuleReader::new_with_offset(self.reader.buffer, self.reader.original_position())
+    }
+
+    pub fn original_position(&self) -> usize {
+        self.reader.original_position()
+    }
+}
--- a/third_party/rust/wasmparser/src/readers/module_section.rs
+++ b/third_party/rust/wasmparser/src/readers/module_section.rs
@ -0,0 +1,55 @@
+use super::{BinaryReader, Result, SectionIteratorLimited, SectionReader, SectionWithLimitedItems};
+
+pub struct ModuleSectionReader<'a> {
+    reader: BinaryReader<'a>,
+    count: u32,
+}
+
+impl<'a> ModuleSectionReader<'a> {
+    pub fn new(data: &'a [u8], offset: usize) -> Result<ModuleSectionReader<'a>> {
+        let mut reader = BinaryReader::new_with_offset(data, offset);
+        let count = reader.read_var_u32()?;
+        Ok(ModuleSectionReader { reader, count })
+    }
+
+    pub fn original_position(&self) -> usize {
+        self.reader.original_position()
+    }
+
+    pub fn get_count(&self) -> u32 {
+        self.count
+    }
+
+    pub fn read(&mut self) -> Result<u32> {
+        self.reader.read_var_u32()
+    }
+}
+
+impl<'a> SectionReader for ModuleSectionReader<'a> {
+    type Item = u32;
+
+    fn read(&mut self) -> Result<Self::Item> {
+        ModuleSectionReader::read(self)
+    }
+    fn eof(&self) -> bool {
+        self.reader.eof()
+    }
+    fn original_position(&self) -> usize {
+        ModuleSectionReader::original_position(self)
+    }
+}
+
+impl<'a> SectionWithLimitedItems for ModuleSectionReader<'a> {
+    fn get_count(&self) -> u32 {
+        ModuleSectionReader::get_count(self)
+    }
+}
+
+impl<'a> IntoIterator for ModuleSectionReader<'a> {
+    type Item = Result<u32>;
+    type IntoIter = SectionIteratorLimited<ModuleSectionReader<'a>>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        SectionIteratorLimited::new(self)
+    }
+}
--- a/third_party/rust/wasmparser/src/readers/name_section.rs
+++ b/third_party/rust/wasmparser/src/readers/name_section.rs
@ -31,6 +31,10 @@ impl<'a> ModuleName<'a> {
        let mut reader = BinaryReader::new_with_offset(self.data, self.offset);
        reader.read_string()
    }
+
+    pub fn original_position(&self) -> usize {
+        self.offset
+    }
 }

 pub struct NamingReader<'a> {
@ -85,6 +89,10 @@ impl<'a> FunctionName<'a> {
    {
        NamingReader::new(self.data, self.offset)
    }
+
+    pub fn original_position(&self) -> usize {
+        self.offset
+    }
 }

 #[derive(Debug, Copy, Clone)]
@ -101,6 +109,10 @@ impl<'a> FunctionLocalName<'a> {
    {
        NamingReader::new(self.data, self.offset)
    }
+
+    pub fn original_position(&self) -> usize {
+        self.offset
+    }
 }

 pub struct FunctionLocalReader<'a> {
@ -152,6 +164,10 @@ impl<'a> LocalName<'a> {
    {
        FunctionLocalReader::new(self.data, self.offset)
    }
+
+    pub fn original_position(&self) -> usize {
+        self.offset
+    }
 }

 #[derive(Debug, Copy, Clone)]
--- a/third_party/rust/wasmparser/src/readers/type_section.rs
+++ b/third_party/rust/wasmparser/src/readers/type_section.rs
@ -13,8 +13,9 @@
 * limitations under the License.
 */

-use super::{
-    BinaryReader, FuncType, Result, SectionIteratorLimited, SectionReader, SectionWithLimitedItems,
+use crate::{
+    BinaryReader, BinaryReaderError, Result, SectionIteratorLimited, SectionReader,
+    SectionWithLimitedItems, TypeDef,
 };

 pub struct TypeSectionReader<'a> {
@ -53,13 +54,23 @@ impl<'a> TypeSectionReader<'a> {
    ///     println!("Type {:?}", ty);
    /// }
    /// ```
-    pub fn read(&mut self) -> Result<FuncType> {
-        self.reader.read_func_type()
+    pub fn read(&mut self) -> Result<TypeDef<'a>> {
+        Ok(match self.reader.read_u8()? {
+            0x60 => TypeDef::Func(self.reader.read_func_type()?),
+            0x61 => TypeDef::Module(self.reader.read_module_type()?),
+            0x62 => TypeDef::Instance(self.reader.read_instance_type()?),
+            _ => {
+                return Err(BinaryReaderError::new(
+                    "invalid leading byte in type definition",
+                    self.original_position() - 1,
+                ))
+            }
+        })
    }
 }

 impl<'a> SectionReader for TypeSectionReader<'a> {
-    type Item = FuncType;
+    type Item = TypeDef<'a>;
    fn read(&mut self) -> Result<Self::Item> {
        TypeSectionReader::read(self)
    }
@ -78,7 +89,7 @@ impl<'a> SectionWithLimitedItems for TypeSectionReader<'a> {
 }

 impl<'a> IntoIterator for TypeSectionReader<'a> {
-    type Item = Result<FuncType>;
+    type Item = Result<TypeDef<'a>>;
    type IntoIter = SectionIteratorLimited<TypeSectionReader<'a>>;

    /// Implements iterator over the type section.
--- a/third_party/rust/wasmparser/src/validator.rs
+++ b/third_party/rust/wasmparser/src/validator.rs