Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Missing optimization with optional pointers #20254

Open
vadim-za opened this issue Jun 10, 2024 · 2 comments
Open

Missing optimization with optional pointers #20254

vadim-za opened this issue Jun 10, 2024 · 2 comments
Milestone

Comments

@vadim-za
Copy link

Zig Version

0.13.0

Steps to Reproduce and Observed Behavior

Compile the following code in ReleaseFast mode

const std = @import("std");

const Data = struct { i: usize = 0 };
const A = struct { data: Data = .{} };
const B = struct { data: *Data };

inline fn getDataA(maybe_a: ?*A) ?*Data {
    return if (maybe_a) |a| &a.data else null;
}

inline fn getDataB(maybe_b: ?*B) ?*Data {
    return if (maybe_b) |b| b.data else null;
}

fn getA(maybe_a: ?*A) usize {
    return if (getDataA(maybe_a)) |data| data.i else 0xFFFF;
}

fn getB(maybe_b: ?*B) usize {
    return if (getDataB(maybe_b)) |data| data.i else 0xFFFF;
}

pub fn main() void {
    var a = A{};
    var b = B{ .data = &a.data };
    std.mem.doNotOptimizeAway(&a);
    std.mem.doNotOptimizeAway(&b);
    @breakpoint();
    std.mem.doNotOptimizeAway(@call(.never_inline, getA, .{&a}));
    std.mem.doNotOptimizeAway(@call(.never_inline, getB, .{&b}));
}

Check the disassembly for getA. On x64 it looks like

009F10A0: 48 85 C9                   test   rcx, rcx
009F10A3: 74 04                      je     0x9f10a9  ; <+9> at main.zig:16
009F10A5: 48 8B 01                   mov    rax, qword ptr [rcx]
009F10A8: C3                         ret    
009F10A9: B8 FF FF 00 00             mov    eax, 0xffff
009F10AE: C3                         ret

Check the disassembly for getB: On x64 it looks like

009F10B0: B8 FF FF 00 00             mov    eax, 0xffff
009F10B5: 48 85 C9                   test   rcx, rcx
009F10B8: 74 0B                      je     0x9f10c5  ; <+21> at main.zig:20
009F10BA: 48 8B 09                   mov    rcx, qword ptr [rcx]
009F10BD: 48 85 C9                   test   rcx, rcx
009F10C0: 74 03                      je     0x9f10c5  ; <+21> at main.zig:20
009F10C2: 48 8B 01                   mov    rax, qword ptr [rcx]
009F10C5: C3                         ret

Apparently there is a second unnecessary check for null in the code generated for getB.

Expected Behavior

In the code generation for getB the compiler is expected to infer that the second check is unnecessary, since the pointer retrieved from b.data is never null, same as in compilation of getA it inferred that the pointer obtained from &a.data is never null.

@vadim-za vadim-za added the bug Observed behavior contradicts documented or intended behavior label Jun 10, 2024
@vadim-za
Copy link
Author

vadim-za commented Jun 14, 2024

Here's an even simpler repro:

const std = @import("std");

const A = struct { data: usize = 0 };
const B = struct { data: *usize };

export fn getA(a: *A) usize {
    const maybe_data: ?*usize = &a.data;
    return if (maybe_data) |data| data.* else 0xFFFF;
}

export fn getB(b: *B) usize {
    const maybe_data: ?*usize = b.data;
    return if (maybe_data) |data| data.* else 0xFFFF;
}

pub fn main() void {
    var a = A{};
    var b = B{ .data = &a.data };
    std.mem.doNotOptimizeAway(&a);
    std.mem.doNotOptimizeAway(&b);
    @breakpoint();
    std.mem.doNotOptimizeAway(@call(.never_inline, getA, .{&a}));
    std.mem.doNotOptimizeAway(@call(.never_inline, getB, .{&b}));
}

The generated code for getA is

00C310A0: 48 8B 01                   mov    rax, qword ptr [rcx]
00C310A3: C3                         ret

The generated code for getB is

00C310B0: 48 8B 01                   mov    rax, qword ptr [rcx]
00C310B3: 48 85 C0                   test   rax, rax
00C310B6: 74 04                      je     0xc310bc  ; <+12> at main.zig:13
00C310B8: 48 8B 00                   mov    rax, qword ptr [rax]
00C310BB: C3                         ret    
00C310BC: B8 FF FF 00 00             mov    eax, 0xffff
00C310C1: C3                         ret

Why is there a comparison against null in getB? The variable maybe_data has been just assigned to a non-null value immediately above?

(Also I had to use the export keywords, otherwise getA gets fully inlined despite the .never_inline call attribute. But this keyword is not necessary for the reproduction of the problem in getB, the issue shows up the same).

@vadim-za
Copy link
Author

The issue seems to have wide-reaching implications in preventing efficient implementation of while loops using the standard iterator idiom while(it.next()). The problem occurs in cases where next() returns an optional pointer (which is pretty standard) and unless this optional pointer is available verbatim in the iterated data. Because any condition inside it.next(), which translates to returning a null vs. a non-null pointer, is evaluated twice. So, unless the optional pointer is unconditionally propagated by next() from the internal data structure, the check will be done twice in the generated code.

@ifreund ifreund added optimization and removed bug Observed behavior contradicts documented or intended behavior labels Jun 14, 2024
@ifreund ifreund added this to the 0.15.0 milestone Jun 14, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants