diff --git a/bpf_progs/bpf_net_helpers.h b/bpf_progs/bpf_net_helpers.h index 0a31861b8f33cee10207b48645a666d312aac341..f3c7de513ddf85bc9d6ae509c8dd7103aecb47da 100644 --- a/bpf_progs/bpf_net_helpers.h +++ b/bpf_progs/bpf_net_helpers.h @@ -91,14 +91,14 @@ struct egress_bool { bool egress; }; #define INGRESS ((struct egress_bool){ .egress = false }) #define EGRESS ((struct egress_bool){ .egress = true }) -// constants for passing in to 'bool downstream' -static const bool UPSTREAM = false; -static const bool DOWNSTREAM = true; +struct stream_bool { bool down; }; +#define UPSTREAM ((struct stream_bool){ .down = false }) +#define DOWNSTREAM ((struct stream_bool){ .down = true }) struct rawip_bool { bool rawip; }; #define ETHER ((struct rawip_bool){ .rawip = false }) #define RAWIP ((struct rawip_bool){ .rawip = true }) -// constants for passing in to 'bool updatetime' -static const bool NO_UPDATETIME = false; -static const bool UPDATETIME = true; +struct updatetime_bool { bool updatetime; }; +#define NO_UPDATETIME ((struct updatetime_bool){ .updatetime = false }) +#define UPDATETIME ((struct updatetime_bool){ .updatetime = true }) diff --git a/bpf_progs/offload.c b/bpf_progs/offload.c index 3682ac58c6b74e87bc4486e2b8cb05a5f90ff715..35b8eea538e6d2a506c3ff412331e5455ccee26b 100644 --- a/bpf_progs/offload.c +++ b/bpf_progs/offload.c @@ -126,7 +126,7 @@ DEFINE_BPF_MAP_GRW(tether_upstream6_map, HASH, TetherUpstream6Key, Tether6Value, static inline __always_inline int do_forward6(struct __sk_buff* skb, const struct rawip_bool rawip, - const bool downstream, + const struct stream_bool stream, const struct kver_uint kver) { const bool is_ethernet = !rawip.rawip; @@ -188,7 +188,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb, TC_PUNT(NON_GLOBAL_DST); // In the upstream direction do not forward traffic within the same /64 subnet. - if (!downstream && (src32 == dst32) && (ip6->saddr.s6_addr32[1] == ip6->daddr.s6_addr32[1])) + if (!stream.down && (src32 == dst32) && (ip6->saddr.s6_addr32[1] == ip6->daddr.s6_addr32[1])) TC_PUNT(LOCAL_SRC_DST); TetherDownstream6Key kd = { @@ -200,15 +200,15 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb, .iif = skb->ifindex, .src64 = 0, }; - if (is_ethernet) __builtin_memcpy(downstream ? kd.dstMac : ku.dstMac, eth->h_dest, ETH_ALEN); + if (is_ethernet) __builtin_memcpy(stream.down ? kd.dstMac : ku.dstMac, eth->h_dest, ETH_ALEN); - Tether6Value* v = downstream ? bpf_tether_downstream6_map_lookup_elem(&kd) - : bpf_tether_upstream6_map_lookup_elem(&ku); + Tether6Value* v = stream.down ? bpf_tether_downstream6_map_lookup_elem(&kd) + : bpf_tether_upstream6_map_lookup_elem(&ku); // If we don't find any offload information then simply let the core stack handle it... if (!v) return TC_ACT_PIPE; - uint32_t stat_and_limit_k = downstream ? skb->ifindex : v->oif; + uint32_t stat_and_limit_k = stream.down ? skb->ifindex : v->oif; TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k); @@ -253,7 +253,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb, // We do this even if TX interface is RAWIP and thus does not need an ethernet header, // because this is easier and the kernel will strip extraneous ethernet header. if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) { - __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1); + __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1); TC_PUNT(CHANGE_HEAD_FAILED); } @@ -265,7 +265,7 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb, // I do not believe this can ever happen, but keep the verifier happy... if (data + sizeof(struct ethhdr) + sizeof(*ip6) > data_end) { - __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1); + __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1); TC_DROP(TOO_SHORT); } }; @@ -285,8 +285,8 @@ static inline __always_inline int do_forward6(struct __sk_buff* skb, // (-ENOTSUPP) if it isn't. bpf_csum_update(skb, 0xFFFF - ntohs(old_hl) + ntohs(new_hl)); - __sync_fetch_and_add(downstream ? &stat_v->rxPackets : &stat_v->txPackets, packets); - __sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes); + __sync_fetch_and_add(stream.down ? &stat_v->rxPackets : &stat_v->txPackets, packets); + __sync_fetch_and_add(stream.down ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes); // Overwrite any mac header with the new one // For a rawip tx interface it will simply be a bunch of zeroes and later stripped. @@ -361,8 +361,8 @@ DEFINE_BPF_MAP_GRW(tether_upstream4_map, HASH, Tether4Key, Tether4Value, 1024, T static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb, const int l2_header_size, void* data, const void* data_end, struct ethhdr* eth, struct iphdr* ip, const struct rawip_bool rawip, - const bool downstream, const bool updatetime, const bool is_tcp, - const struct kver_uint kver) { + const struct stream_bool stream, const struct updatetime_bool updatetime, + const bool is_tcp, const struct kver_uint kver) { const bool is_ethernet = !rawip.rawip; struct tcphdr* tcph = is_tcp ? (void*)(ip + 1) : NULL; struct udphdr* udph = is_tcp ? NULL : (void*)(ip + 1); @@ -421,13 +421,13 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb, }; if (is_ethernet) __builtin_memcpy(k.dstMac, eth->h_dest, ETH_ALEN); - Tether4Value* v = downstream ? bpf_tether_downstream4_map_lookup_elem(&k) - : bpf_tether_upstream4_map_lookup_elem(&k); + Tether4Value* v = stream.down ? bpf_tether_downstream4_map_lookup_elem(&k) + : bpf_tether_upstream4_map_lookup_elem(&k); // If we don't find any offload information then simply let the core stack handle it... if (!v) return TC_ACT_PIPE; - uint32_t stat_and_limit_k = downstream ? skb->ifindex : v->oif; + uint32_t stat_and_limit_k = stream.down ? skb->ifindex : v->oif; TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k); @@ -472,7 +472,7 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb, // We do this even if TX interface is RAWIP and thus does not need an ethernet header, // because this is easier and the kernel will strip extraneous ethernet header. if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) { - __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1); + __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1); TC_PUNT(CHANGE_HEAD_FAILED); } @@ -486,7 +486,7 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb, // I do not believe this can ever happen, but keep the verifier happy... if (data + sizeof(struct ethhdr) + sizeof(*ip) + (is_tcp ? sizeof(*tcph) : sizeof(*udph)) > data_end) { - __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1); + __sync_fetch_and_add(stream.down ? &stat_v->rxErrors : &stat_v->txErrors, 1); TC_DROP(TOO_SHORT); } }; @@ -538,10 +538,10 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb, // This requires the bpf_ktime_get_boot_ns() helper which was added in 5.8, // and backported to all Android Common Kernel 4.14+ trees. - if (updatetime) v->last_used = bpf_ktime_get_boot_ns(); + if (updatetime.updatetime) v->last_used = bpf_ktime_get_boot_ns(); - __sync_fetch_and_add(downstream ? &stat_v->rxPackets : &stat_v->txPackets, packets); - __sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes); + __sync_fetch_and_add(stream.down ? &stat_v->rxPackets : &stat_v->txPackets, packets); + __sync_fetch_and_add(stream.down ? &stat_v->rxBytes : &stat_v->txBytes, L3_bytes); // Redirect to forwarded interface. // @@ -554,8 +554,8 @@ static inline __always_inline int do_forward4_bottom(struct __sk_buff* skb, static inline __always_inline int do_forward4(struct __sk_buff* skb, const struct rawip_bool rawip, - const bool downstream, - const bool updatetime, + const struct stream_bool stream, + const struct updatetime_bool updatetime, const struct kver_uint kver) { const bool is_ethernet = !rawip.rawip; @@ -616,16 +616,16 @@ static inline __always_inline int do_forward4(struct __sk_buff* skb, // in such a situation we can only support TCP. This also has the added nice benefit of // using a separate error counter, and thus making it obvious which version of the program // is loaded. - if (!updatetime && ip->protocol != IPPROTO_TCP) TC_PUNT(NON_TCP); + if (!updatetime.updatetime && ip->protocol != IPPROTO_TCP) TC_PUNT(NON_TCP); // We do not support offloading anything besides IPv4 TCP and UDP, due to need for NAT, // but no need to check this if !updatetime due to check immediately above. - if (updatetime && (ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP)) + if (updatetime.updatetime && (ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP)) TC_PUNT(NON_TCP_UDP); // We want to make sure that the compiler will, in the !updatetime case, entirely optimize // out all the non-tcp logic. Also note that at this point is_udp === !is_tcp. - const bool is_tcp = !updatetime || (ip->protocol == IPPROTO_TCP); + const bool is_tcp = !updatetime.updatetime || (ip->protocol == IPPROTO_TCP); // This is a bit of a hack to make things easier on the bpf verifier. // (In particular I believe the Linux 4.14 kernel's verifier can get confused later on about @@ -646,10 +646,10 @@ static inline __always_inline int do_forward4(struct __sk_buff* skb, // if the underlying requisite kernel support (bpf_ktime_get_boot_ns) was backported. if (is_tcp) { return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip, - rawip, downstream, updatetime, /* is_tcp */ true, kver); + rawip, stream, updatetime, /* is_tcp */ true, kver); } else { return do_forward4_bottom(skb, l2_header_size, data, data_end, eth, ip, - rawip, downstream, updatetime, /* is_tcp */ false, kver); + rawip, stream, updatetime, /* is_tcp */ false, kver); } } @@ -808,16 +808,17 @@ DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_ether$stub", TETHERING_UID DEFINE_BPF_MAP_GRW(tether_dev_map, DEVMAP_HASH, uint32_t, uint32_t, 64, TETHERING_GID) static inline __always_inline int do_xdp_forward6(struct xdp_md *ctx, const struct rawip_bool rawip, - const bool downstream) { + const struct stream_bool stream) { return XDP_PASS; } static inline __always_inline int do_xdp_forward4(struct xdp_md *ctx, const struct rawip_bool rawip, - const bool downstream) { + const struct stream_bool stream) { return XDP_PASS; } -static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx, const bool downstream) { +static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx, + const struct stream_bool stream) { const void* data = (void*)(long)ctx->data; const void* data_end = (void*)(long)ctx->data_end; const struct ethhdr* eth = data; @@ -826,15 +827,16 @@ static inline __always_inline int do_xdp_forward_ether(struct xdp_md *ctx, const if ((void*)(eth + 1) > data_end) return XDP_PASS; if (eth->h_proto == htons(ETH_P_IPV6)) - return do_xdp_forward6(ctx, ETHER, downstream); + return do_xdp_forward6(ctx, ETHER, stream); if (eth->h_proto == htons(ETH_P_IP)) - return do_xdp_forward4(ctx, ETHER, downstream); + return do_xdp_forward4(ctx, ETHER, stream); // Anything else we don't know how to handle... return XDP_PASS; } -static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx, const bool downstream) { +static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx, + const struct stream_bool stream) { const void* data = (void*)(long)ctx->data; const void* data_end = (void*)(long)ctx->data_end; @@ -842,8 +844,8 @@ static inline __always_inline int do_xdp_forward_rawip(struct xdp_md *ctx, const if (data_end - data < 1) return XDP_PASS; const uint8_t v = (*(uint8_t*)data) >> 4; - if (v == 6) return do_xdp_forward6(ctx, RAWIP, downstream); - if (v == 4) return do_xdp_forward4(ctx, RAWIP, downstream); + if (v == 6) return do_xdp_forward6(ctx, RAWIP, stream); + if (v == 4) return do_xdp_forward4(ctx, RAWIP, stream); // Anything else we don't know how to handle... return XDP_PASS; diff --git a/netbpfload/Android.bp b/netbpfload/Android.bp index cc4f5d03c8ffd83417780398004d263dbed13364..d4e7ba8cb274a081da90b8ed128d23c29403c207 100644 --- a/netbpfload/Android.bp +++ b/netbpfload/Android.bp @@ -36,6 +36,4 @@ cc_binary { "loader.cpp", "NetBpfLoad.cpp", ], - - init_rc: ["netbpfload.rc"], } diff --git a/netbpfload/NetBpfLoad.cpp b/netbpfload/NetBpfLoad.cpp index 7d9c48e612965cac16d821ea60309ddcc2e89615..b44a0bc71c2defb8dd7ada1823f3f947de84c64a 100644 --- a/netbpfload/NetBpfLoad.cpp +++ b/netbpfload/NetBpfLoad.cpp @@ -65,46 +65,34 @@ bool exists(const char* const path) { abort(); // can only hit this if permissions (likely selinux) are screwed up } -constexpr unsigned long long kTetheringApexDomainBitmask = - domainToBitmask(domain::tethering) | - domainToBitmask(domain::net_private) | - domainToBitmask(domain::net_shared) | - domainToBitmask(domain::netd_readonly) | - domainToBitmask(domain::netd_shared); - const android::bpf::Location locations[] = { // S+ Tethering mainline module (network_stack): tether offload { .dir = "/apex/com.android.tethering/etc/bpf/", .prefix = "tethering/", - .allowedDomainBitmask = kTetheringApexDomainBitmask, }, // T+ Tethering mainline module (shared with netd & system server) // netutils_wrapper (for iptables xt_bpf) has access to programs { .dir = "/apex/com.android.tethering/etc/bpf/netd_shared/", .prefix = "netd_shared/", - .allowedDomainBitmask = kTetheringApexDomainBitmask, }, // T+ Tethering mainline module (shared with netd & system server) // netutils_wrapper has no access, netd has read only access { .dir = "/apex/com.android.tethering/etc/bpf/netd_readonly/", .prefix = "netd_readonly/", - .allowedDomainBitmask = kTetheringApexDomainBitmask, }, // T+ Tethering mainline module (shared with system server) { .dir = "/apex/com.android.tethering/etc/bpf/net_shared/", .prefix = "net_shared/", - .allowedDomainBitmask = kTetheringApexDomainBitmask, }, // T+ Tethering mainline module (not shared, just network_stack) { .dir = "/apex/com.android.tethering/etc/bpf/net_private/", .prefix = "net_private/", - .allowedDomainBitmask = kTetheringApexDomainBitmask, }, }; @@ -247,13 +235,6 @@ int main(int argc, char** argv) { if (createSysFsBpfSubDir(location.prefix)) return 1; } - // Note: there's no actual src dir for fs_bpf_loader .o's, - // so it is not listed in 'locations[].prefix'. - // This is because this is primarily meant for triggering genfscon rules, - // and as such this will likely always be the case. - // Thus we need to manually create the /sys/fs/bpf/loader subdirectory. - if (createSysFsBpfSubDir("loader")) return 1; - // Load all ELF objects, create programs and maps, and pin them for (const auto& location : locations) { if (loadAllElfObjects(location) != 0) { diff --git a/netbpfload/loader.cpp b/netbpfload/loader.cpp index 9aeb184b0539ea6e3079e8e1deca331fa0bdbf4b..c534b2cecc0ddce8063719c9e40c8fd5c727fbc5 100644 --- a/netbpfload/loader.cpp +++ b/netbpfload/loader.cpp @@ -621,8 +621,7 @@ static bool mapMatchesExpectations(const unique_fd& fd, const string& mapName, } static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>& mapFds, - const char* prefix, const unsigned long long allowedDomainBitmask, - const size_t sizeOfBpfMapDef) { + const char* prefix, const size_t sizeOfBpfMapDef) { int ret; vector<char> mdData; vector<struct bpf_map_def> md; @@ -733,11 +732,6 @@ static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>& domain selinux_context = getDomainFromSelinuxContext(md[i].selinux_context); if (specified(selinux_context)) { - if (!inDomainBitmask(selinux_context, allowedDomainBitmask)) { - ALOGE("map %s has invalid selinux_context of %d (allowed bitmask 0x%llx)", - mapNames[i].c_str(), selinux_context, allowedDomainBitmask); - return -EINVAL; - } ALOGI("map %s selinux_context [%-32s] -> %d -> '%s' (%s)", mapNames[i].c_str(), md[i].selinux_context, selinux_context, lookupSelinuxContext(selinux_context), lookupPinSubdir(selinux_context)); @@ -746,11 +740,6 @@ static int createMaps(const char* elfPath, ifstream& elfFile, vector<unique_fd>& domain pin_subdir = getDomainFromPinSubdir(md[i].pin_subdir); if (unrecognized(pin_subdir)) return -ENOTDIR; if (specified(pin_subdir)) { - if (!inDomainBitmask(pin_subdir, allowedDomainBitmask)) { - ALOGE("map %s has invalid pin_subdir of %d (allowed bitmask 0x%llx)", - mapNames[i].c_str(), pin_subdir, allowedDomainBitmask); - return -EINVAL; - } ALOGI("map %s pin_subdir [%-32s] -> %d -> '%s'", mapNames[i].c_str(), md[i].pin_subdir, pin_subdir, lookupPinSubdir(pin_subdir)); } @@ -921,7 +910,7 @@ static void applyMapRelo(ifstream& elfFile, vector<unique_fd> &mapFds, vector<co } static int loadCodeSections(const char* elfPath, vector<codeSection>& cs, const string& license, - const char* prefix, const unsigned long long allowedDomainBitmask) { + const char* prefix) { unsigned kvers = kernelVersion(); if (!kvers) { @@ -980,22 +969,12 @@ static int loadCodeSections(const char* elfPath, vector<codeSection>& cs, const if (unrecognized(pin_subdir)) return -ENOTDIR; if (specified(selinux_context)) { - if (!inDomainBitmask(selinux_context, allowedDomainBitmask)) { - ALOGE("prog %s has invalid selinux_context of %d (allowed bitmask 0x%llx)", - name.c_str(), selinux_context, allowedDomainBitmask); - return -EINVAL; - } ALOGI("prog %s selinux_context [%-32s] -> %d -> '%s' (%s)", name.c_str(), cs[i].prog_def->selinux_context, selinux_context, lookupSelinuxContext(selinux_context), lookupPinSubdir(selinux_context)); } if (specified(pin_subdir)) { - if (!inDomainBitmask(pin_subdir, allowedDomainBitmask)) { - ALOGE("prog %s has invalid pin_subdir of %d (allowed bitmask 0x%llx)", name.c_str(), - pin_subdir, allowedDomainBitmask); - return -EINVAL; - } ALOGI("prog %s pin_subdir [%-32s] -> %d -> '%s'", name.c_str(), cs[i].prog_def->pin_subdir, pin_subdir, lookupPinSubdir(pin_subdir)); } @@ -1185,8 +1164,7 @@ int loadProg(const char* elfPath, bool* isCritical, const Location& location) { /* Just for future debugging */ if (0) dumpAllCs(cs); - ret = createMaps(elfPath, elfFile, mapFds, location.prefix, location.allowedDomainBitmask, - sizeOfBpfMapDef); + ret = createMaps(elfPath, elfFile, mapFds, location.prefix, sizeOfBpfMapDef); if (ret) { ALOGE("Failed to create maps: (ret=%d) in %s", ret, elfPath); return ret; @@ -1197,8 +1175,7 @@ int loadProg(const char* elfPath, bool* isCritical, const Location& location) { applyMapRelo(elfFile, mapFds, cs); - ret = loadCodeSections(elfPath, cs, string(license.data()), location.prefix, - location.allowedDomainBitmask); + ret = loadCodeSections(elfPath, cs, string(license.data()), location.prefix); if (ret) ALOGE("Failed to load programs, loadCodeSections ret=%d", ret); return ret; diff --git a/netbpfload/loader.h b/netbpfload/loader.h index 6402ceafac88630d8379aafa873476ccb94d413a..b8846376936c136f6722e9530d18914f42f511dd 100644 --- a/netbpfload/loader.h +++ b/netbpfload/loader.h @@ -64,18 +64,9 @@ static constexpr bool specified(domain d) { return d != domain::unspecified; } -static constexpr unsigned long long domainToBitmask(domain d) { - return specified(d) ? 1uLL << (static_cast<int>(d) - 1) : 0; -} - -static constexpr bool inDomainBitmask(domain d, unsigned long long v) { - return domainToBitmask(d) & v; -} - struct Location { const char* const dir = ""; const char* const prefix = ""; - unsigned long long allowedDomainBitmask = 0; }; // BPF loader implementation. Loads an eBPF ELF object diff --git a/netbpfload/netbpfload.rc b/netbpfload/netbpfload.rc deleted file mode 100644 index 20fbb9f01bb5a86ef2e558e6d8a03aa63a01cbe5..0000000000000000000000000000000000000000 --- a/netbpfload/netbpfload.rc +++ /dev/null @@ -1,85 +0,0 @@ -# zygote-start is what officially starts netd (see //system/core/rootdir/init.rc) -# However, on some hardware it's started from post-fs-data as well, which is just -# a tad earlier. There's no benefit to that though, since on 4.9+ P+ devices netd -# will just block until bpfloader finishes and sets the bpf.progs_loaded property. -# -# It is important that we start netbpfload after: -# - /sys/fs/bpf is already mounted, -# - apex (incl. rollback) is initialized (so that in the future we can load bpf -# programs shipped as part of apex mainline modules) -# - logd is ready for us to log stuff -# -# At the same time we want to be as early as possible to reduce races and thus -# failures (before memory is fragmented, and cpu is busy running tons of other -# stuff) and we absolutely want to be before netd and the system boot slot is -# considered to have booted successfully. -# -on load_bpf_programs - exec_start netbpfload - -service netbpfload /system/bin/netbpfload - capabilities CHOWN SYS_ADMIN NET_ADMIN - # The following group memberships are a workaround for lack of DAC_OVERRIDE - # and allow us to open (among other things) files that we created and are - # no longer root owned (due to CHOWN) but still have group read access to - # one of the following groups. This is not perfect, but a more correct - # solution requires significantly more effort to implement. - group root graphics network_stack net_admin net_bw_acct net_bw_stats net_raw system - user root - # - # Set RLIMIT_MEMLOCK to 1GiB for netbpfload - # - # Actually only 8MiB would be needed if netbpfload ran as its own uid. - # - # However, while the rlimit is per-thread, the accounting is system wide. - # So, for example, if the graphics stack has already allocated 10MiB of - # memlock data before netbpfload even gets a chance to run, it would fail - # if its memlock rlimit is only 8MiB - since there would be none left for it. - # - # netbpfload succeeding is critical to system health, since a failure will - # cause netd crashloop and thus system server crashloop... and the only - # recovery is a full kernel reboot. - # - # We've had issues where devices would sometimes (rarely) boot into - # a crashloop because netbpfload would occasionally lose a boot time - # race against the graphics stack's boot time locked memory allocation. - # - # Thus netbpfload's memlock has to be 8MB higher then the locked memory - # consumption of the root uid anywhere else in the system... - # But we don't know what that is for all possible devices... - # - # Ideally, we'd simply grant netbpfload the IPC_LOCK capability and it - # would simply ignore it's memlock rlimit... but it turns that this - # capability is not even checked by the kernel's bpf system call. - # - # As such we simply use 1GiB as a reasonable approximation of infinity. - # - rlimit memlock 1073741824 1073741824 - oneshot - # - # How to debug bootloops caused by 'netbpfload-failed'. - # - # 1. On some lower RAM devices (like wembley) you may need to first enable developer mode - # (from the Settings app UI), and change the developer option "Logger buffer sizes" - # from the default (wembley: 64kB) to the maximum (1M) per log buffer. - # Otherwise buffer will overflow before you manage to dump it and you'll get useless logs. - # - # 2. comment out 'reboot_on_failure reboot,netbpfload-failed' below - # 3. rebuild/reflash/reboot - # 4. as the device is booting up capture netbpfload logs via: - # adb logcat -s 'NetBpfLoad:*' 'NetBpfLoader:*' - # - # something like: - # $ adb reboot; sleep 1; adb wait-for-device; adb root; sleep 1; adb wait-for-device; adb logcat -s 'NetBpfLoad:*' 'NetBpfLoader:*' - # will take care of capturing logs as early as possible - # - # 5. look through the logs from the kernel's bpf verifier that netbpfload dumps out, - # it usually makes sense to search back from the end and find the particular - # bpf verifier failure that caused netbpfload to terminate early with an error code. - # This will probably be something along the lines of 'too many jumps' or - # 'cannot prove return value is 0 or 1' or 'unsupported / unknown operation / helper', - # 'invalid bpf_context access', etc. - # - reboot_on_failure reboot,netbpfload-failed - # we're not really updatable, but want to be able to load bpf programs shipped in apexes - updatable