From 85462b8b429bbd0bd5b56ac58ea861b9eef32a2e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Sat, 18 Jul 2020 08:53:44 +0200 Subject: [PATCH 01/34] ipq807x: add the Qualcomm AX target support Signed-off-by: John Crispin --- config/Config-kernel.in | 9 + include/image.mk | 6 +- include/kernel-version.mk | 2 +- package/boot/uboot-envtools/files/ipq807x | 37 + .../etc/hotplug.d/firmware/11-ath10k-caldata | 5 + target/linux/ipq807x/109-logspam.patch | 24 + target/linux/ipq807x/Makefile | 22 + .../ipq807x/base-files/etc/board.d/01_leds | 38 + .../ipq807x/base-files/etc/board.d/02_network | 80 + .../etc/hotplug.d/firmware/10-ath11k-caldata | 95 + .../ipq807x/base-files/etc/hotplug.d/net/macs | 3 + .../ipq807x/base-files/etc/init.d/aq_phy | 16 + .../ipq807x/base-files/etc/init.d/bootcount | 12 + .../linux/ipq807x/base-files/etc/init.d/wdt | 14 + ...G4_v5.4.B-AQR_CIG_WIFI_ID44715_VER1673.cld | Bin 0 -> 391170 bytes .../base-files/lib/upgrade/platform.sh | 72 + target/linux/ipq807x/config-4.4 | 828 + .../arm/boot/dts/qcom-ipq6018-cig-wf188.dts | 18 + .../arm/boot/dts/qcom-ipq6018-cig-wf188n.dts | 18 + .../boot/dts/qcom-ipq6018-edgecore-eap101.dts | 18 + .../boot/dts/qcom-ipq6018-miwifi-ax1800.dts | 18 + .../boot/dts/qcom-ipq6018-wallys-dr6018.dts | 18 + .../arch/arm/boot/dts/qcom-ipq807x-eap102.dts | 26 + .../arch/arm/boot/dts/qcom-ipq807x-eap106.dts | 26 + .../arch/arm/boot/dts/qcom-ipq807x-ex227.dts | 26 + .../arch/arm/boot/dts/qcom-ipq807x-ex447.dts | 26 + .../boot/dts/qcom-ipq807x-sercomm-wallaby.dts | 26 + .../arch/arm/boot/dts/qcom-ipq807x-wf194c.dts | 26 + .../arm/boot/dts/qcom-ipq807x-wf194c4.dts | 26 + .../dts/qcom/qcom-ipq6018-miwifi-ax1800.dts | 419 + .../dts/qcom/qcom-ipq6018-wallys-dr6018.dts | 441 + .../boot/dts/qcom/qcom-ipq807x-eap102.dts | 918 + .../boot/dts/qcom/qcom-ipq807x-wf194c4.dts | 942 + target/linux/ipq807x/image/Makefile | 26 + target/linux/ipq807x/image/ipq50xx.mk | 10 + target/linux/ipq807x/image/ipq60xx.mk | 56 + target/linux/ipq807x/image/ipq807x.mk | 90 + target/linux/ipq807x/ipq50xx/config-default | 84 + target/linux/ipq807x/ipq50xx/config-lowmem | 73 + target/linux/ipq807x/ipq50xx/target.mk | 10 + target/linux/ipq807x/ipq60xx/config-default | 122 + .../linux/ipq807x/ipq60xx/profiles/default.mk | 9 + target/linux/ipq807x/ipq60xx/target.mk | 8 + target/linux/ipq807x/ipq807x/config-default | 78 + .../linux/ipq807x/ipq807x/profiles/default.mk | 9 + target/linux/ipq807x/ipq807x/target.mk | 7 + target/linux/ipq807x/modules.mk | 61 + .../linux/ipq807x/patches/100-qrtr-ns.patch | 976 + .../linux/ipq807x/patches/101-squashfs.patch | 16 + .../linux/ipq807x/patches/102-cig-wf188.patch | 869 + .../ipq807x/patches/103-sercomm-wallaby.patch | 816 + target/linux/ipq807x/patches/104-wf194c.patch | 816 + .../patches/105-fix-dtc-gcc10-build.patch | 11 + target/linux/ipq807x/patches/106-eap101.patch | 993 + .../linux/ipq807x/patches/108-log-spam.patch | 37 + target/linux/ipq807x/patches/109-tplink.patch | 1518 + .../ipq807x/patches/110-add-esmt-nand.patch | 37 + target/linux/ipq807x/patches/111-eap106.patch | 765 + target/linux/ipq807x/patches/112-pstore.patch | 147 + .../ipq807x/patches/200-bpf_backport.patch | 44780 ++++++++++++++++ toolchain/kernel-headers/Makefile | 8 + 61 files changed, 56685 insertions(+), 2 deletions(-) create mode 100644 package/boot/uboot-envtools/files/ipq807x create mode 100644 target/linux/ipq807x/109-logspam.patch create mode 100644 target/linux/ipq807x/Makefile create mode 100755 target/linux/ipq807x/base-files/etc/board.d/01_leds create mode 100755 target/linux/ipq807x/base-files/etc/board.d/02_network create mode 100755 target/linux/ipq807x/base-files/etc/hotplug.d/firmware/10-ath11k-caldata create mode 100644 target/linux/ipq807x/base-files/etc/hotplug.d/net/macs create mode 100755 target/linux/ipq807x/base-files/etc/init.d/aq_phy create mode 100755 target/linux/ipq807x/base-files/etc/init.d/bootcount create mode 100755 target/linux/ipq807x/base-files/etc/init.d/wdt create mode 100644 target/linux/ipq807x/base-files/lib/firmware/AQR-G4_v5.4.B-AQR_CIG_WIFI_ID44715_VER1673.cld create mode 100755 target/linux/ipq807x/base-files/lib/upgrade/platform.sh create mode 100644 target/linux/ipq807x/config-4.4 create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-cig-wf188.dts create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-cig-wf188n.dts create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-edgecore-eap101.dts create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-miwifi-ax1800.dts create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-wallys-dr6018.dts create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-eap102.dts create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-eap106.dts create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-ex227.dts create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-ex447.dts create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-sercomm-wallaby.dts create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-wf194c.dts create mode 100644 target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-wf194c4.dts create mode 100755 target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq6018-miwifi-ax1800.dts create mode 100755 target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq6018-wallys-dr6018.dts create mode 100755 target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq807x-eap102.dts create mode 100644 target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq807x-wf194c4.dts create mode 100644 target/linux/ipq807x/image/Makefile create mode 100644 target/linux/ipq807x/image/ipq50xx.mk create mode 100644 target/linux/ipq807x/image/ipq60xx.mk create mode 100644 target/linux/ipq807x/image/ipq807x.mk create mode 100644 target/linux/ipq807x/ipq50xx/config-default create mode 100644 target/linux/ipq807x/ipq50xx/config-lowmem create mode 100644 target/linux/ipq807x/ipq50xx/target.mk create mode 100644 target/linux/ipq807x/ipq60xx/config-default create mode 100644 target/linux/ipq807x/ipq60xx/profiles/default.mk create mode 100644 target/linux/ipq807x/ipq60xx/target.mk create mode 100644 target/linux/ipq807x/ipq807x/config-default create mode 100644 target/linux/ipq807x/ipq807x/profiles/default.mk create mode 100644 target/linux/ipq807x/ipq807x/target.mk create mode 100644 target/linux/ipq807x/modules.mk create mode 100644 target/linux/ipq807x/patches/100-qrtr-ns.patch create mode 100644 target/linux/ipq807x/patches/101-squashfs.patch create mode 100644 target/linux/ipq807x/patches/102-cig-wf188.patch create mode 100644 target/linux/ipq807x/patches/103-sercomm-wallaby.patch create mode 100644 target/linux/ipq807x/patches/104-wf194c.patch create mode 100644 target/linux/ipq807x/patches/105-fix-dtc-gcc10-build.patch create mode 100644 target/linux/ipq807x/patches/106-eap101.patch create mode 100644 target/linux/ipq807x/patches/108-log-spam.patch create mode 100644 target/linux/ipq807x/patches/109-tplink.patch create mode 100644 target/linux/ipq807x/patches/110-add-esmt-nand.patch create mode 100644 target/linux/ipq807x/patches/111-eap106.patch create mode 100644 target/linux/ipq807x/patches/112-pstore.patch create mode 100644 target/linux/ipq807x/patches/200-bpf_backport.patch diff --git a/config/Config-kernel.in b/config/Config-kernel.in index f71114b5da..4a85d83118 100644 --- a/config/Config-kernel.in +++ b/config/Config-kernel.in @@ -2,6 +2,15 @@ # # Copyright (C) 2006-2014 OpenWrt.org +config KERNEL_IPQ_MEM_PROFILE + int "Different memory profile " + range 0 1024 + default 512 + depends on TARGET_ipq807x + help + This option select memory profile to be used,which defines + the reserved memory configuration used in device tree. + config KERNEL_BUILD_USER string "Custom Kernel Build User Name" default "builder" if BUILDBOT diff --git a/include/image.mk b/include/image.mk index 6fc02a3f6b..92b23321b1 100644 --- a/include/image.mk +++ b/include/image.mk @@ -179,6 +179,10 @@ define Image/pad-root-squashfs $(call Image/pad-to,$(KDIR)/root.squashfs,$(if $(1),$(1),$(ROOTFS_PARTSIZE))) endef +ifeq ($(CONFIG_IPQ_MEM_PROFILE),512) +DTC_CFLAGS = -D __IPQ_MEM_PROFILE_512_MB__ +endif + # $(1) source dts file # $(2) target dtb file # $(3) extra CPP flags @@ -188,7 +192,7 @@ define Image/BuildDTB -I$(DTS_DIR) \ -I$(DTS_DIR)/include \ -I$(LINUX_DIR)/include/ \ - -undef -D__DTS__ $(3) \ + -undef -D__DTS__ $(DTC_CFLAGS) $(3) \ -o $(2).tmp $(1) $(LINUX_DIR)/scripts/dtc/dtc -O dtb \ -i$(dir $(1)) $(DTC_FLAGS) $(4) \ diff --git a/include/kernel-version.mk b/include/kernel-version.mk index 3c109c13c8..dff01895be 100644 --- a/include/kernel-version.mk +++ b/include/kernel-version.mk @@ -11,7 +11,7 @@ LINUX_VERSION-4.14 = .193 LINUX_VERSION-5.4 = .142 LINUX_VERSION-5.10 = .27 -LINUX_KERNEL_HASH-4.4.60 = 2cd8df6f1ac6a5329c5a286ec9b5956215977221a1b731597ed169fff74a9659 +LINUX_KERNEL_HASH-4.4.60 = e7f2f47acf17497d6ffd713eda65c025b3df0bce09faa8c04712bf1b3cfc9fdb LINUX_KERNEL_HASH-4.14.193 = 0b0fb41d4430e1a42738b341cbfd2f41951aa5cd02acabbd53f076119c8b9f03 LINUX_KERNEL_HASH-5.4.142 = 99785728968564ba27c7e552d024b560072dcbc885540912eabb5c021e231451 LINUX_KERNEL_HASH-5.10.27 = d99dc9662951299c53a0a8d8c8d0a72a16ff861d20e927c0f9b14f63282d69d9 diff --git a/package/boot/uboot-envtools/files/ipq807x b/package/boot/uboot-envtools/files/ipq807x new file mode 100644 index 0000000000..6c429f1852 --- /dev/null +++ b/package/boot/uboot-envtools/files/ipq807x @@ -0,0 +1,37 @@ +[ -e /etc/config/ubootenv ] && exit 0 + +touch /etc/config/ubootenv + +. /lib/uboot-envtools.sh +. /lib/functions.sh + +board=$(board_name) + +ubootenv_mtdinfo () { + UBOOTENV_PART=$(cat /proc/mtd | grep APPSBLENV) + mtd_dev=$(echo $UBOOTENV_PART | awk '{print $1}' | sed 's/:$//') + mtd_size=$(echo $UBOOTENV_PART | awk '{print "0x"$2}') + mtd_erase=$(echo $UBOOTENV_PART | awk '{print "0x"$3}') + nor_flash=$(find /sys/bus/spi/devices/*/mtd -name ${mtd_dev}) + + if [ -n "$nor_flash" ]; then + ubootenv_size=$mtd_size + else + # size is fixed to 0x40000 in u-boot + ubootenv_size=0x40000 + fi + + sectors=$(( $ubootenv_size / $mtd_erase )) + echo /dev/$mtd_dev 0x0 $ubootenv_size $mtd_erase $sectors +} + +case "$board" in +*) + ubootenv_add_uci_config $(ubootenv_mtdinfo) + ;; +esac + +config_load ubootenv +config_foreach ubootenv_add_app_config ubootenv + +exit 0 diff --git a/target/linux/ipq40xx/base-files/etc/hotplug.d/firmware/11-ath10k-caldata b/target/linux/ipq40xx/base-files/etc/hotplug.d/firmware/11-ath10k-caldata index b12c9af9a7..b7ee73b12e 100644 --- a/target/linux/ipq40xx/base-files/etc/hotplug.d/firmware/11-ath10k-caldata +++ b/target/linux/ipq40xx/base-files/etc/hotplug.d/firmware/11-ath10k-caldata @@ -265,6 +265,11 @@ case "$FIRMWARE" in caldata_extract "ART" 0x5000 0x2f20 ath10k_patch_mac $(macaddr_add $(cat /sys/class/net/eth0/address) -1) ;; + tp-link,ap2220 |\ + tp-link,ec420-g1) + ath10kcal_extract "0:ART" 4096 12064 + ath10kcal_patch_mac_crc $(macaddr_add "$(get_tip mac_address)" 9) #2.4G, wlan1 + ;; esac ;; *) diff --git a/target/linux/ipq807x/109-logspam.patch b/target/linux/ipq807x/109-logspam.patch new file mode 100644 index 0000000000..79ee61dc77 --- /dev/null +++ b/target/linux/ipq807x/109-logspam.patch @@ -0,0 +1,24 @@ +Index: linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/drivers/clk/qcom/clk-branch.c +=================================================================== +--- linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce.orig/drivers/clk/qcom/clk-branch.c ++++ linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/drivers/clk/qcom/clk-branch.c +@@ -75,7 +75,7 @@ static int clk_branch_wait(const struct + bool (check_halt)(const struct clk_branch *, bool)) + { + bool voted = br->halt_check & BRANCH_VOTED; +- const char *name = clk_hw_get_name(&br->clkr.hw); ++ //const char *name = clk_hw_get_name(&br->clkr.hw); + + /* Skip checking halt bit if the clock is in hardware gated mode */ + if (clk_branch_in_hwcg_mode(br)) +@@ -93,8 +93,8 @@ static int clk_branch_wait(const struct + return 0; + udelay(1); + } +- WARN(1, "%s status stuck at 'o%s'", name, +- enabling ? "ff" : "n"); ++/* WARN(1, "%s status stuck at 'o%s'", name, ++ enabling ? "ff" : "n");*/ + return -EBUSY; + } + return 0; diff --git a/target/linux/ipq807x/Makefile b/target/linux/ipq807x/Makefile new file mode 100644 index 0000000000..abdb82ec5d --- /dev/null +++ b/target/linux/ipq807x/Makefile @@ -0,0 +1,22 @@ +include $(TOPDIR)/rules.mk + +ARCH:=arm +BOARD:=ipq807x +BOARDNAME:=Qualcomm Atheros AX +SUBTARGETS:=ipq807x ipq60xx ipq50xx +FEATURES:=squashfs ramdisk nand pcie usb +KERNELNAME:=Image dtbs +CPU_TYPE:=cortex-a7 + +KERNEL_PATCHVER:=4.4 +KERNEL_NAME_SUFFIX=-qsdk-11f09717303ecd83c3a64e9efe23f25921dc1016 + +include $(INCLUDE_DIR)/target.mk +DEFAULT_PACKAGES += kmod-qca-nss-dp kmod-qca-ssdk swconfig \ + kmod-qca-nss-drv \ + kmod-usb-phy-ipq807x kmod-usb-dwc3-of-simple \ + kmod-ath11k-ahb kmod-qrtr_mproc wpad \ + kmod-gpio-button-hotplug \ + qca-thermald-10.4 qca-ssdk-shell kmod-qca-nss-drv-bridge-mgr + +$(eval $(call BuildTarget)) diff --git a/target/linux/ipq807x/base-files/etc/board.d/01_leds b/target/linux/ipq807x/base-files/etc/board.d/01_leds new file mode 100755 index 0000000000..1f1797b0c6 --- /dev/null +++ b/target/linux/ipq807x/base-files/etc/board.d/01_leds @@ -0,0 +1,38 @@ +#!/bin/sh + +. /lib/functions/uci-defaults.sh + +board_config_update + +board=$(board_name) + +case "$board" in +sercomm,wallaby|\ +wallys,dr6018|\ +cig,wf188n|\ +cig,wf194c|\ +cig,wf194c4) + ucidef_set_led_netdev "wan" "WAN" "green:wan" "eth0" "tx rx link" + ucidef_set_led_wlan "wlan5g" "WLAN5G" "green:wifi5" "phy0tpt" + ucidef_set_led_wlan "wlan2g" "WLAN2G" "green:wifi2" "phy1tpt" + ;; +esac + +case "$board" in +edgecore,eap101) + ucidef_set_led_wlan "wlan5g" "WLAN5G" "green:wifi5" "phy0tpt" + ucidef_set_led_wlan "wlan2g" "WLAN2G" "green:wifi2" "phy1tpt" + ucidef_set_led_netdev "lan1" "lan1" "green:lan1" "eth1" + ucidef_set_led_netdev "lan2" "lan2" "green:lan2" "eth2" + ucidef_set_led_netdev "poe" "poe" "green:wan" "eth0" + ;; +edgecore,eap102) + ucidef_set_led_netdev "poe" "poe" "green:wan" "eth0" + ucidef_set_led_wlan "wlan5g" "WLAN5G" "green:wifi5" "phy0tpt" + ucidef_set_led_wlan "wlan2g" "WLAN2G" "green:wifi2" "phy1tpt" + ;; +esac + +board_config_flush + +exit 0 diff --git a/target/linux/ipq807x/base-files/etc/board.d/02_network b/target/linux/ipq807x/base-files/etc/board.d/02_network new file mode 100755 index 0000000000..9d7dd4e61e --- /dev/null +++ b/target/linux/ipq807x/base-files/etc/board.d/02_network @@ -0,0 +1,80 @@ +#!/bin/sh + +. /lib/functions.sh +. /lib/functions/uci-defaults.sh +. /lib/functions/system.sh + +qcom_setup_interfaces() +{ + local board="$1" + ucidef_add_switch "switch0" + ucidef_add_switch_attr "switch0" "enable" "false" + ucidef_add_switch_attr "switch0" "reset" "false" + + case $board in + cig,wf188|\ + tplink,ex227|\ + tplink,ex447) + ucidef_set_interface_wan "eth0" + ucidef_set_interface_lan "" + ;; + qcom,ipq6018-cp01|\ + qcom,ipq807x-hk01) + ucidef_set_interface_lan "eth0 eth1 eth2 eth3 eth4" + ucidef_set_interface_wan "eth5" + ;; + cig,wf194c|\ + cig,wf194c4|\ + qcom,ipq5018-mp03.3|\ + sercomm,wallaby) + ucidef_set_interface_lan "eth0" + ucidef_set_interface_wan "eth1" + ;; + edgecore,eap101) + ucidef_set_interface_lan "eth1 eth2" + ucidef_set_interface_wan "eth0" + ;; + edgecore,eap102|\ + edgecore,eap106|\ + wallys,dr6018|\ + cig,wf188n) + ucidef_set_interface_lan "eth1" + ucidef_set_interface_wan "eth0" + ;; + qcom,ipq807x-hk14) + ucidef_set_interface_lan "eth0 eth1 eth2 eth3" + ucidef_set_interface_wan "eth4" + ;; + esac +} + +qcom_setup_macs() +{ + local board="$1" + + case $board in + cig,wf194c|\ + cig,wf194c4) + mac=$(grep BaseMacAddress= /dev/mtd14 | cut -dx -f2) + wan_mac=$(macaddr_canonicalize $mac) + lan_mac=$(macaddr_add "$wan_mac" 1) + ucidef_set_network_device_mac eth0 $lan_mac + ucidef_set_network_device_mac eth1 $wan_mac + ;; + *) + wan_mac=$(cat /sys/class/net/eth0/address) + lan_mac=$(macaddr_add "$wan_mac" 1) + ;; + esac + [ -n "$lan_mac" ] && ucidef_set_interface_macaddr "lan" $lan_mac + [ -n "$wan_mac" ] && ucidef_set_interface_macaddr "wan" $wan_mac +} + +board_config_update +board=$(board_name) +ucidef_set_bridge_device bridge +qcom_setup_interfaces $board +qcom_setup_macs $board +board_config_flush + +exit 0 diff --git a/target/linux/ipq807x/base-files/etc/hotplug.d/firmware/10-ath11k-caldata b/target/linux/ipq807x/base-files/etc/hotplug.d/firmware/10-ath11k-caldata new file mode 100755 index 0000000000..1788908ab0 --- /dev/null +++ b/target/linux/ipq807x/base-files/etc/hotplug.d/firmware/10-ath11k-caldata @@ -0,0 +1,95 @@ +#!/bin/sh + +[ -e /lib/firmware/$FIRMWARE ] && exit 0 + +. /lib/functions.sh +. /lib/functions/system.sh + +ath11k_generate_macs() { + touch /lib/firmware/ath11k-macs + eth=$(cat /sys/class/net/eth0/address) + mac1=$(macaddr_add $eth 2) + mac2=$(macaddr_add $eth 10) + mac3=$(macaddr_add $eth 18) + echo -ne \\x${mac1//:/\\x} >> /lib/firmware/ath11k-macs + echo -ne \\x${mac2//:/\\x} >> /lib/firmware/ath11k-macs + echo -ne \\x${mac3//:/\\x} >> /lib/firmware/ath11k-macs +} + +caldata_die() { + echo "caldata: " "$*" + exit 1 +} + +caldata_extract() { + local part=$1 + local offset=$(($2)) + local count=$(($3)) + local mtd + + mtd=$(find_mtd_chardev $part) + [ -n "$mtd" ] || caldata_die "no mtd device found for partition $part" + + dd if=$mtd of=/lib/firmware/$FIRMWARE iflag=skip_bytes bs=$count skip=$offset count=1 2>/dev/null || \ + caldata_die "failed to extract calibration data from $mtd" +} + +board=$(board_name) + +case "$FIRMWARE" in +"ath11k/IPQ8074/hw2.0/caldata.bin") + case "$board" in + cig,wf194c|\ + cig,wf194c4|\ + edgecore,eap102 |\ + edgecore,eap106 |\ + qcom,ipq807x-hk01|\ + qcom,ipq807x-hk14|\ + tplink,ex227|\ + tplink,ex447|\ + sercomm,wallaby) + caldata_extract "0:ART" 0x1000 0x20000 + ;; + esac + ;; +"ath11k/IPQ6018/hw1.0/caldata.bin") + case "$board" in + cig,wf188|\ + cig,wf188n|\ + edgecore,eap101|\ + wallys,dr6018|\ + qcom,ipq6018-cp01|\ + xiaomi,ax1800) + caldata_extract "0:ART" 0x1000 0x20000 + ;; + esac + ;; +ath11k/QCN9074/hw1.0/caldata_1.bin) + case "$board" in + qcom,ipq807x-hk14) + caldata_extract "0:ART" 0x26800 0x20000 + ;; + esac + ;; +ath11k/QCN9074/hw1.0/caldata_2.bin) + case "$board" in + qcom,ipq807x-hk14|\ + qcom,ipq5018-mp03.3) + caldata_extract "0:ART" 0x4C000 0x20000 + ;; + esac + ;; +ath11k-macs) + case "$board" in + edgecore,eap101|\ + edgecore,eap102|\ + edgecore,eap106|\ + cig,wf188n) + ath11k_generate_macs + ;; + esac + ;; +*) + exit 1 + ;; +esac diff --git a/target/linux/ipq807x/base-files/etc/hotplug.d/net/macs b/target/linux/ipq807x/base-files/etc/hotplug.d/net/macs new file mode 100644 index 0000000000..13e95ec947 --- /dev/null +++ b/target/linux/ipq807x/base-files/etc/hotplug.d/net/macs @@ -0,0 +1,3 @@ +#!/bin/sh +mac=$(cat /etc/board.json | jsonfilter -e '@["network-device"]["'$DEVICENAME'"]'.macaddr) +[ -n "$mac" ] && ip link set $DEVICENAME address $mac diff --git a/target/linux/ipq807x/base-files/etc/init.d/aq_phy b/target/linux/ipq807x/base-files/etc/init.d/aq_phy new file mode 100755 index 0000000000..e64755b5d6 --- /dev/null +++ b/target/linux/ipq807x/base-files/etc/init.d/aq_phy @@ -0,0 +1,16 @@ +#!/bin/sh /etc/rc.common + +START=30 + +boot() { + . /lib/functions.sh + + case "$(board_name)" in + cig,wf194c|\ + cig,wf194c4) + aq-fw-download /lib/firmware/AQR-G4_v5.4.B-AQR_CIG_WIFI_ID44715_VER1673.cld miireg 0 > /dev/null + sleep 1 + ssdk_sh debug phy set 0 0x4004c441 0x8 + ;; + esac +} diff --git a/target/linux/ipq807x/base-files/etc/init.d/bootcount b/target/linux/ipq807x/base-files/etc/init.d/bootcount new file mode 100755 index 0000000000..a24f27353e --- /dev/null +++ b/target/linux/ipq807x/base-files/etc/init.d/bootcount @@ -0,0 +1,12 @@ +#!/bin/sh /etc/rc.common + +START=99 + +boot() { + case "$(board_name)" in + edgecore,eap101|\ + edgecore,eap102) + fw_setenv bootcount 0 + ;; + esac +} diff --git a/target/linux/ipq807x/base-files/etc/init.d/wdt b/target/linux/ipq807x/base-files/etc/init.d/wdt new file mode 100755 index 0000000000..75de36a347 --- /dev/null +++ b/target/linux/ipq807x/base-files/etc/init.d/wdt @@ -0,0 +1,14 @@ +#!/bin/sh /etc/rc.common + +START=10 + +boot() { + . /lib/functions.sh + + case "$(board_name)" in + cig,wf188|\ + cig,wf188n) + watchdog -t 5 /dev/watchdog1 + ;; + esac +} diff --git a/target/linux/ipq807x/base-files/lib/firmware/AQR-G4_v5.4.B-AQR_CIG_WIFI_ID44715_VER1673.cld b/target/linux/ipq807x/base-files/lib/firmware/AQR-G4_v5.4.B-AQR_CIG_WIFI_ID44715_VER1673.cld new file mode 100644 index 0000000000000000000000000000000000000000..4a5a76cd34f390a2f6de2911cf91a74c42774602 GIT binary patch literal 391170 zcmeFZdstJ~);GGcvPeiW#RM>N)kPqv2pB*SyfsitMYJ2Kq}6U)hz1pHYpuI#H*J$> zDJtDNL|Z{^caoZNsnjI4-q1Egi#Kc`y?B9O2t+`U##HOw^^Fy@d%xc~@Atgt-RC>s z`2!!Hm3bX=%- zU>Jd61cnh9Mqn6$VFZQ|7)D?ifnfxO5g0~b7=d8~h7lM>U>Jd61cnh9Mqn6$VFZQ| z7)D?ifnfxO5g0~b7=d8~h7lM>U>Jd61cnh9Mqn6$VFZQ|7)D?ifnfxO5g0~b7=d8~ zh7lM>U>Jd61pdzcOlBQT7>FapB}3?nd% zz%T;C2>kCuz@NnU7=evH#>8yQxo*agA8hF}|NC0}{~6`)a`->zknSAGtsHFfe}lL0 zIQ*YEq?;qT#lfchH|n3x!P+?di#cR}j^u8RRGE{f$tl$3tTz1@**OiBIb=U)bsI-= zmm~dvledFYSi@O;@W05;IbZvZ9P)aOR1_Y?EC14eJTQ zt_ETHAWRn|A+f0YgJf?IS%gL18=QY{u;~6EV!ngt74ysd|7TXEOY$%ho9Xe8SYC?t z!2aOXYcp0$CocQVnzjH9hFT*h?{W)m9V=z;JJ@elK9buR!2)hx4d4sUQ zAi>$eq_cyPD|D+-XMjiPZ+Urd=1H?MtXYD*1sT=?!NjbI1vJx3^5Rnd-_52dmFs=o~VMtVSyQZulmHG^sVIn5Hgyrt4#+_X35MIlZg;DxdgNS!9 z%wNB9P_<$ZD4+=2(^#a3CIIzS#e-TMwj+e6UovQ3I;fJ(ib3IVik_FA6oDiu zgZzvX&hg*8OiHGxFp(aG=*q2SlQEr_PF_XFAn)Uy7?!L@cy=Q2NJ=W9 zVA({>Qy|EjpCL7pN000-(A|1WM_Qo5vh*laS8dIX#&kD*bm-V)It%oOU_}BQX|+^H z$y8OD48vsVN1lxW+4x6qBo3~gLN?DI!mucQFz-#tvZwbgd!|FO$(Xm@C|vImUc9-p zTpv4Fm_q7O_=N?eu7JNmKw57}TW?96xAHulzv1^{sCHbSD8l^t`_eH z;g&uvAhSqe43@0J^0EcI1(Ij>ClHYyghzT{J|!11L`7JB5i=*M0Y65FcVKCP@+4`B znk-i*aJQsPn+`Nmo3LYk=a>z|Cqz!v!ld$~dHBcMMj%!^{*btie(`5$bcLn&O~n$^ z*An|#hUVBt2UoZY9XvV;;fN_rjVgWEnj#55PR`g!l7*3;y!%q&C^AoL#W+%{vJx95 zmBy=d*Hli=O{e4*2#{pG?HqgcWwoT-MV70M`IF?$^M^+APHy?8ce?-iMnBUxO54dt zty`^<3$|k;S}C<8&4s}P6_GRX+m-;{vA9{iDeT+(iD|Pe=N@@dtXQc4T3P*95<8+# zU^lRh<=DX#foY>vcHtIeVB7UI>3H@}6mJ~sM+zgQg{8k29+!l7kmoiwe@9_8N8ixg zlUCGWp;GLUUOJ${Zt7cgH-Xbu(=F3;F4HVq0@IVFlAKXrCr*Afk{5lc=lm@Hli&C` zuPa5zA6W;jrtfX0K^HbC@R)8o8%gU`bpz^cM7+J18qZJn>jiOdd4!mlNI=GsWft17 zKtZ~VHeD@XX}TvBP7E-qtynnSKC}!nWCcYKE`7|4iAK%zjBO?%jZ>QnT+Ra3IlpmV z2k<6e`tJM;_PC?OnRCe}9y#-Fl5?%5O)jilV9K&35Xu5ldR2YSuNR8O_=@(@MMY3_ zz)B;UOwxp3cA1`o($X_(Pl4*X-^s59q7ziBTlvpEp7^i!aj1u~1DzmOC#T~zll@R2 z>n#dHc-dM?H(=G7Zs~M4o4aTPKt7KVuj27Y_&)cA{eHACMS7&Sq{8iEr4?4dK(e}$ zo}Kf661+z z?lA1x{qJaQK9-d%{m}XVj7dSKgRJ~!yFSu$Mr}%QL5_oYa(@8t*Ev5vJCl9#7;)(* z*AI`Z4y);$&E#-l4gqPp*$Sd^2}I?5qmZ|EPUExFN1ks)ASa0ekQd+wCNdrIg|Hz8 zP^5I8&GgmAc|0e~v(5k~Ks82+Pml9fT&Vw-g zI_JA*XRyzMFq{CUBzZSmcUw)HeR7j-b1*?}B= zPi~&KJ@m_a(%b9MTR!iWv|IV?(A4NrdcXS*VgvpC=hP4l^*6k?F$8-`AZmF;&+ZYC zo~cvtgUNgpNOyMT2yD`mzWam&!%Qhw=%>kai*=cjn3)UTm>x}Bx%Yb`X)gmJ^YvWmDvcmaNEt79w&aUlLsLG*~MVfmQTfVWzGy zFUk7sexbC$nk5ZK6 z&!iR1R=oHZOyaXmezzwgPSKo!jLZR5&Vc@z0dw|%GhcAzYss{YTMEYWbZ=T$XKzdA z<@o6gR^5b+L&}|=KN;A^fy{`4apHK@jDam`wLkX8^nu^kk7-p4lMEy${vOw~GR}6~^8wkbI19L0lYoV3NY%Rpvdam73osMe#L3au{&CH4j55oGQtV=K_Gl+5R zTHDoYtF| ze=uD@u6PFj1_}LmT)p~nb=fNq`l1*+uC-NO1FWC$J^mSb%J5TwAKISRTvLyKFY52t zV*ODqy$ulA9@OH++?%wD2Dt>yedPpuNIOFkZKnHV8Y6t^v}ICLC#aHK_ zd6je?*lNByN0Sjdo8CnNUEvy>e9w=JDwKMq^y-kW&zqiM5;OAmM1m5q7BCj2Jh2pq zv^CTrrMDz`xANpolCK_{$+-431GI5rwMqhZbg|FQrA^z(O$n@>DMS2ZV2ha&xm}X% z{aB8Dd>l()MS4CDy2Fc|HYL&%9-Eh9^_9aaewy>9oQZ{}$ib>DSeN3B^c+~*?@P%% zK|x~vcZ|`BaR-+E3~6hajEO0kCmuoanr9dj3(9<(W17c&lI?09?K&@An?Y`5$i}mF zpR|4;m{z`=xjtxx-}2}Uqt>Ua5GNn!Z?%e&+cIWbB~#U@x>}O%~2%NFfN;N z8!8;wbln%@WsO#hKd{M}Om_j$D+A{UU?!2QRlX^JT><9%F2>oemeH_Yl zB7*e7@8dZ^x`LqX3@qx{)dt+Si%dESzwQNz#y3NbF;z#dp5xZ}Bd%g?VgoK-4{LSU8r=z(8HYqyr`wiC0WUbHww;M0%jv zvQ%+C$B;SOUC5cqb*lsCar1TW*w1W~HHdiKvX@2TFC@Hsvdd9oS+M%dL)q*oEZjEkXGaa@=IpWQJ>@wKGoo_^C+6poNPD5+&y1Jme@Xkd!ZOJc*`FAid% z62wzbxW4qNWe+AEo78|uyd*YFjo>1lcmb;cSA>rdZkdQ6>i&2va2f{1J0ek>B6E!X zH6K|;08JJbcZIoM3Wr9;^TPq+za;n)6H)=ewF$B#{))E(EuWF5BUa%TY$*Mgu;xfi z|H@T!;nlXns|dBW-ZTnc0Bobyp_aU(eK2L83A1_Nf4GqieC&Daw8#G9sLfkTsE5nLEDeG zA^Kfg%L52o5L4MGfPfw0VA(ziGyVwM5N7`ow}R0N6-O|YEp%bm2;A91J_$4a2-`r-Kw(L z<|r7saPgFI7%EQ13d9wUNo6rG6lRP}ePKHX_QyWD8A(thrNWt++nGP&qy>8Ary(mh*wV*t0!mN>qqxsdu^UV{}{iTg1E-0DJ^nd zxA3_jVNj6br?BEM4lN*VPHMIvCd?FgZFhvf^3}HAgyno%gcSa%75PviT)bv1>fe!! zXG^IjcRy(vnmc79G1d$!dpcD%jcQJ(Sm{)D8dWxxYEGqCFtesmWs@m-PNh)B*kJpa z2f~>xt}}OqooCS;G+i+b_@_IsSOs$Gj^en}IX-y}SFD5tLlfW96fAym@=Pv_^mD04 z-&ASz?rP!;HrxQD`$0^Ua26Sey_i z8_XWHxhlbCK^$k17D_06BL zGl${+h9V8zA_(fvg(GOV_`tALSpC!7_tU_)t;+E|K zg#|jV94{VED4w0XcaGbW)bzLh%D?p=LUqVgZ>>oWY^vxleYLe-ZPD#M13oF6_|3o} z{agf71%$;JF^|jJBCO(6yd4|orv-I$+P^q@QSsy#6kjD3hs|%m(M03-Q&AjDu!mD% z`peF+6w?C%i>pcVxQK~H7p?7A^e0KrSWg~c%w)@cB#!xk4bDI#E;8JIjnqxuD0`Zv zU)67187O@IVHpqWh$#-@ix*3Z!?@zi;Nma>v>FZi(Hwdh6wlFGs2+rD-dIp$7yLlg zM|03LNSHge`5pzbO9sl0KSW1BljM%g{vA?4-g2SVP`@%Ttw>rs0d1UG!gdz~`yx%5 z{U=KT%G1rc{nm1csh(dqVRJgW#NQUmwVul;E)970DzELC{?fZOA2Jg_s`~KKe#s!& zk2l}dF_*h%n?Sc<~kTuxGY|NQj+j<=22h*!C^uC7WIO9Ey0 z<|8E6QQ6rH#Xd>P5q)xh1P{%1AC}M^<)V1Rc_Xo37&b$jffXh6gFcYQZiFuiH$-dd zqLy7HUbxCYJQUh89T8~ zVI%|pmC|dAZiZ*M{zSKKzTIb;Sb11)5Crq;A+8-)$zI{Fshs?ID6XluKO4v3?LmYt z>-jm)WD;0xmLPUPfT1aPtN9$^z}*%sn~$jlT*OiSJ)tbtJc1DnS!3&s!| z5r(ELOH-Z$FF65f-BuLg{sIG6ab{G+Ioi|sDP4LuX3=>-)aU9?_BjY~mBitvKhcNo zBc#|NsW_;&^_fk5r~-{Jek=i=45HN1%dT3w=cvOT6`mZe^W5V*ukrQ!`^=mBB<~WE zLZY>GkG&`qoU_1PAw_!Ssla2Gxli&c;dJmNzY?yZu)IM!x9cB$kOhg-USG8Caa3}J zjzX4Vy9BkAHhM*=;2n@u5)w6GEf=&_3!L==6UEn2qDxMruc1i2wr|y44aHYd zqdq^`wWqBKiXSm||}C`osV@9_{$ zO3(^hqDInlUQpU$ZREq!SbDVd9qS<9Ge|<*D@u0B)11wMuJ`(&7z~k3GzHqQb`sVf zh*k#-8fk42ta>;!c=o8!rnmZF4TP%DQM)|ZB2P*n0Kn34ZM0IVi}65Ls`<3jISf9u z3@uTDURX3jd`BR=6ppws*_vO)isy`Q-xA0cg(D7~uo9Gt_sk}ak`xjJeJ>MWtWxY@7pYAhDVD!bM5<}5wh3wwMbHx}tTiB0dDX8Lbb$--nWr6idFrFPp$rNYw%swzLB2c7Q zA<^IMg(-yQ8dpeQX*?%rj-nukhd8i??WFZL1e6=x+~SkSi?h0{*RvwAF&_N0R3 zia!Y!e=SgaVN3HlU6kz=8>+NUp@YAn(=Um{%r3t0wMcPj9iDfF+_j*Nz{Ih zFFq2YZ-#j31IXU$5JTzXPoifXibElKYwt0pFV}u9L~$_05R)zL#q~#@6tbTSc7GkB zKm5;lg!`)yw8i{&?>Ryp^Q<=JnQG?dAot05v!&Otbj~>fy)A~wK8WN(WT&Dz-mo;s z5cOxpfe@5i947@_4133_{@$yN5s1cDYBX7;6*EOkk%7w0Y{Pcteb|HcofkPZ-Zbfd)NhY{$jN=LZO`R^p)^b4ipqydO{;tDQ7^exeYR%ayV*II^#&n)J^qE#Kx8QUky>_@ z)F}yJ-=qW{tCgw^)_p|?zeq^nG`CPqOM7b&&T?W%lzp1A$3)wU777;!pt1U*-maH> zThC^w3VT~G0VAV9QGsZA90FHN42YJi=*6}ddaafWP#T~dov)0bxfOAh`!*W??hl-*qZZzHrdRpZ0>dPoq3C7H{90|)0!Y`su2(82tLN9d zq6#4Ddffn0WIuP*Ui1?5$z9^#l-6sBdRZ7psH0vqv@w)FPgZKOv~ymx&spe-lF?OG zeFopk)Lw7ANx3;@b-7?j??a+@l_}z#-eYWU)JtfL>rd}Ol4X?+zG|^jU9<=S_E17x!gw(iQsVLm}1#xhYI5|igDPvndCbUH_EW1v;aGilhf^jk( zg?n~w6W)y$o)LMYUJ$-6LNArQ4~EaUT^V#!F;lehI@8|>;e`mz4}`EZ0+hi-)>;f9 zpl>O9W!$a_P|Oxf&tpsMHpE%BideQ1)_q}r6f_tCMasY^LTH4&=oN3&qS9a~#m|g- z#p)!a${0_%09LosK3H$))d+x4D8>8yORpKg%(m;cTQ%dhK`M0hk4Chybc$W9K>IU6)k4b2C#9df9V=w32}#b_Vo;izD#>I3_fXs zMshUeBw0=`J?bS&IC>vVjVqZ4F+;RcQ%{pKK(f6|Ks{SB9p#seg#C)*75}|+4iUfb zaG1jjX=_Kci#^1aD9f%GgmN4Hn9 zdsl!yEZYhjnxU~I?^|X2+)y0`QX7+K`Z@y^@~JCwVaUU?H&IXsMUr1wbq(8^*HI^5 z8+_r_YbZAxdYR1Xe?(V{NdsXSy^?(p2)_i*6!uDX4Fq$Cq!DphC6vZR2*-3lvsWp^ zd0`dt0xc`Diw=amI?7x0N;Vwb(fg#|*Km^Fydn|l#)AV7){`gupb3lvPe+K?2tYOI z=UnkEt72->a>jC20=IBkMl&pMIAR@mK0^G4Kv?gu&$wdFxPp$lA540Y@uJ^5+`JbF zx(pln9au6_yjmc<#HwOcaWuP$Y)Iyi9ztUx3^A#t1NNA-)lR}QD6l&C)_Ojy>mW@{ zu~$kj7n%rZqrl@!rtP|=X^`2Ja3#+~7&xXmdSv`!VkJ&R;TBKX^iFnyOB zg=j_Th{>zgq-}IZCg^XvQRE?S(@nRK=Z844;fndAub|2C?AVEGTskGnZu7chr@tPX z``4}x_l+BF_1XKvG+yyaQWz9s5C#4|ZM0S%m`wn1@N3=0Z|%3rz8sGxRC`OoEiBte zc||ileN{I_Jp*&h-TX3w)L*GYDIzJ)n$W}a3ibQTjrsJ-H|@( zhk8gOE;Nz8FOU8`xzga=MmG0S7roHW zo^@UlR7=W#OSf%&3|(21!ZmD)t{YSFBdRd22lC1^!Y zt*b6kpLi%+e+B3a@il~<0hoHd1A!41 ztj;tW)qpYtTqQf{9>8Ff?6Lr35vHTtEb_oIC3Oc!A=25~fFI8tLU!37`UgB1M6(R? zc&$8vwf=v}I?ZTbdm8SfTL$vlxNSQ=B3d_+n`z7yvqD(QsTD(jRNH-=*h_ZWe4N-y zcER|faLul5Fb1rbDY|;eHW#Fgb^f}|1!)WasH^qW)t2m3`|7Gob~VqZs_-2vf;6>+ z?97kAH42yJ(%mS_{iBxGSIb+n)8(t>D%oWN>XFuS+iXxSdkrWFZdH{ivaCo0_hinra|$p-mca8N6_leMC`YSBCmIECQy znS5T&SN{2O0WENeAs;LmaR?v%*Cz=HA+Q2GV(;c6?y2NsxYbH1#$pw5rLIU%rftUj zSa*?_Zrm=u)%qqe_0?b(@t}B2fMU-mRg_yfEn1nPX=Y$z!e96K#+lrzV8iKqpjm~R zxy7M)a}*XkVWH*pL*ZXX#OBN_UUu_p9}ch0MLLJaj`h03SZWb z*YuC@ibUW6k;C3JsM$%Obgi5Zr+PM#$RY|tVJE*}phX5bpLL1qVz@8hXe1bCU6}iB zR_`YQJ=HVgEQ%K!CaFcD`f{~ggwm^nvwwh5L2t|{klhE=l3*kk^z=M+->8V`jP!l?24=ZEF=Df%>uKA zYy6pA1yVWf;0FIDdyloAG{~o&+~D6<+0*I^SAO3E=o?n3b`xIFbO?G^s9mBN5OA&V zd6D8R6JXO!fb~&209$HUNPOj$<+$=gKcW9k7D+ zezkW$-h6`6rC6Jf56M|^;3(K&$o8rRY?ssgJ4`ei47R&) zmIEg+a0TrKm8!>*lypUXPLDczqw!FDEw0lA%SsB17WW|b{IZ@5c~7!!M%&Xp7mJ{n zAx3in_S;##k(oA;psq4Df(M^cR$V2cZtqSsUP`H(FTRvw7YV>wduTq|Vlv%yFNuJ% z#SuvV*B<1CmlPu!vDszJ)U}g zHUm>-_bBN!5V5adZo4g9)_F{y*<-%njas1lC6hKpm)SA%te#pNZJ>t{ua0j19*c-S z9bMLf)dr%OH6P;HF&MPnfV*P#{Q|{kMwI|$BQ|V)Y+PjVw3QM5cLLGGCU+vx}m_9d>zC@jGEqX1uqH-%CtX+VrtE0;*u_mhf5RR_G z>^Y3rMZk1oCYX{$c&MAB~t?Re$%A4IM0|IW` z=sr2pzX72Oo1@F##dgePqx%-<(_gW+>)l<~yP-yJOpoYF^5hXi_Nlg}Qa zA;?p2AqZ%S3Ha9zv?!>>l<=K?xadnH~OL$KODV-i#I=y zu}t6xsKw8*b#B6WZrkbT=I7vWN%0B0;t~pbyc!t4p=5K!r!p^SYw^-X+MAA_{P2y4 zd3}acmhi!T!};Bu4VF80uUt^}6sE`?RhI+;qgvCg9|j5LQ+x;~Q3)3; z5uEdNqu}Pp8G>xlo$9dBFoa+4w=_#I^i)-Txd_dL9kcR>jb(SJQ%M&rPYO2Qq{kmL zi#-PP2>R4Hz;|BV1_m_f7+5|a&%RV++{i~^hm!B$Ln;PND(Ptzu1I9Sq~f6sTDicO zz<|?+l@~-d91VGilr7O>QG|F$%9&)ONXODZCQhbN55u0k5$Pp;nv7s7i%TcF z?_nL&zzmJZoiW$JMFA1h+|Pw{#AR=Xv34Jt1PbCA01i%wb7?M_Y~g$Jj{aAAjzc<%5GCO{mshI?s7b0tZ&J_ z8(roF8&bUo^UVTtP&ZJr$Etj`1_W*UF58P$wRl@VH*B`*@TL*n2hX6YjjO}iE;e1)607Px!&iOe^6HcxI40f#V8iHZ_r2HTc4@%nbA~)kTbdS{sy#aK z;H&$dB5iND$F0w)oaXu9FDR`We~`2eT# z*ZuwY#ptrrR8=wxgM-P~bOwyj)kL7~lgv0ZjLK~A4K~6JC`M1MLXG{{Z%Uc zDwnQ~4g+!^c7<$X*+j7+c`>&<83kh@Qn;&sj8Me!Ym>!uxbgL3GmWxd=Q6};AW+AE zPZgtBj2mZhBj)C4H)Nt$4_PiopWHS2#u-;14qldi+(?YlwPwxub8Cw=yNa4Izyf#f zV1IP>ZpvMhlD!S?gsZSOUUQ+ppkg!`Gzr{xM+(EkcTp>9@xjJoW}@V z1Dqo~-Z4XRJkPL9h5oEx;Bqr2wN<%J;qP$I&&A6YVCZGta~m_|qfszRh_x+s-FVKG zk>~2lrkT->tFE4}`4C)G2<(LkCexlPD{)pX_W57&vPGCd9&B|G!Uh^A*SMXgL552n z4Po!NT^`S#LP`LTKuY#B3=xXiY~fFS!g{|u__+59>XKw^Kldg+xn1YnB-kvdj;c74 zykfLLn_nFjUT?_Hce5qJIa%%sw(#oHD7-4_l%Qf46tHOX!y7I6`L#*mS-As{VAtmN zXTWlrpU+}o&YOa}8EF~2x9@&iwG4Xr0m9i2Mzqwp+b2~lirXe%F*-LT?z8-o(UBgD zC|Jk~<82jQ7#BP78N=BJ!nc!SUz!m+J>4J@7OzZwFiroWi+0@4M^%JeUOz zo$`a@@#dx2=ezLi_b}hligw&it(;a#@^QQ5^2rdejnk{)D655ExQPHir?Q1qf8mR& zh(ceqP)SHz_<2KF`@W@1ZY6Y&qnGJu1z{=|6fPm4Zs`R$m!s2$%+zIiXu*88OPipR zi&6}t$%TI>bOu85jlDSPwWfKgSix#y_W(=tq;Lzebpiox4f;{9IT|z zIK`FTuF`JJF01S=Usm4-jTpw!J_}0%T(Ds1ZcNs>C%f{W_vSCK$zvsrd^ivU%|#b` z)dCZLJX@`OUKT=VwF^Ez%=FdXsngw5)(Z^sScoQ%+a)org&Gi*)DrG1!IC<{{o6=M zJz-20O1>bB&S1&j$qF$W*eg?0pKpB^#pv~2=8`V-zR|hvB@Q{H5Z;iyh}m{a-Df3{B(pDZU9XBL1WsZ6~%ZO z;mnfM%41k*lraQOK*hQit8AbJsK~Ci0A0Ph3w(_*AMEq7*$lK-T1Yrp3QMBCw97o9 z3muitCmenXbE1BImpQNtwMa7w2ca<0^A0AwOEk6g5FA{Bs*w!xOD{XG&;sbWHKco{ zHiuD&m;LJ_AET3?HA8|AhzW zrtssJH^-lNBM3`Gr(^}mZB(&gW3E$x=z6gGm>vpr?{>(ADIuu3vW~aC+QKwrg1!u2y zGB|4ss|C^)!D_(2o&?V@?E!|Jj7~p{##96XLvfmrZrxtK?BURqOnQa zo#bJiYvO3hh(FIpSr6V1p8y7D}QYjZocT;u>nHc2P0p^3 zPDB1$XaaG=s{s~ZOy|2U0d?&A-h&LQozTD)!sD<~(bp*GblBL^M~rkB<`0ZL>mX%( zG?KnrI!SnyCH7|vx3ExXanLAUzwEVOtv26#`l0MkLbxhcYT=+ksJS}y}N8r}T0WzSRM#s4A!K&UI#sk82}*J4^2`)g`Z z(v2ji7v?w|d3+1&JE@i@mA2EdZwS!NnokH&NJGgMQF2~62!rozM4nV>hcN>@lbe3H z4BH2&2i^9KSu#jE-Rt8N@3?I4fGl(X04knHUn|~6W5xQ4l6^KH+b{s&9i^K0QFefI z-Zh9dQn0RrC&5Bx*HgS}vUlNvvf>EE^7`~k6+U`Jp#ITi^B0%l_Qwl}6>$je=~dzE z6(jU_F2g#3_PPraWm_Kk?mK5-O}j2bhqdu~`nyVR{)&!TCW01EP1ZTtC4QQALN+tt zAOx-uvM)!RgupFAHg*K!45VvCQ+ikUnf^}5W(#ja;8Q|&cogEa)J-!eZd3ofr80qT zY??v*J5?1Ztmm>>|eKu9@Q@{aWZp&KA>f4UYWgH9JE zSwxczB7jQ*m*!w0H@OI@mM|p|QWbqa$&^B`2Cahv6C8X=2@R+jc)Xh>&@>99jijUz zG&NZhPn@hL93w%@E3KDJF&QT1WH{Z`&-LW)<1ew##BU-Zr z$}CdnGY4U3VckXOs(~rE222j|#UA9|z6ytC1R4)%&@t%V$vAdRJkI)s^6?6)N%sk1 zJx1&Z7k8k2_mF~yxs zB#QK$jJrmQ0jxiI)n$twLlwp;i58s_?t+%;OD`KICnCHe1zvWKr=LX8>l9qqc{(I|qm^Dc?VNE% zt6XyW>_{7}*2bbAFeJ<}zhO?&szh|e2m#*aKWDGiMu z;zS*-0-~mWSY6R-yL^^i{uIWM!Xw%$+*wX`1z%PwE0xNdYWYkjjZE2j>XQ+6`AjGY zeiV6nw%s@X!8|_)5-fg)1>UxHe?9xMxwcarlc@K*Y;Nl`Mknft%Vy#-LZ{z8z2$U? z;mr7hJAXqv&Ddq5<^g)!Om&Ji4-5v(4PTv7p?Tnwhayb%dndU@r(9<$9}NNyd~Rl} zAD8z!2BF#Xa-12A!877zlGQO}VHRH!vk}s&=nT<9EYJ5w3JAft6!mnH`MSaaQ~FNzTssot>pO6a31xwd%ch4z0P6*%x>!A9iyS|67_Pz`jY>M_sgY^B-URy zR^;}Mz@0pJ?=*g@*5(zz4mS#+hFZMZI~JUmoga)0QTkXkpY5y)1G@(@tHYE}*rEB+ zTRYWpuMc%e8@E~=f?49;*x@e)0}l#Y!ZBOiUo0hSEB4ys)|}`08RFh@B20C#)9IsI zd7$%72waDOqIW}_KDvOKu^muTc>lN;Fn@bzQA6h&qRuqYT%v&~3wy4Ck!v~F)ISkKXF!pq(1h$p$)e3aA8HSb0 z>`{z1ZD*IZv*y^dHM1rh63iHRa3rs?ptD>=dp|^n1nX8F)H6Hb9yB9Szqu1Mgcwh3 z?xV~fbk;QTn&3O58s1pIL;be*JJ0X@kfp}DHg(o~lYL`jXU$V0{ld;FL3uJ*_mXM` zToBk%g_m8Vnx5}GB(lP;O_co$)%JGh`7pffC#p~2bP~e9-Bjgw-VxEhG*R>SROOqU znw?fz&TqBJhXhhEuNy(Tg7In<-n(%~1d0=Ad`C5{>}*>Jtl+GPSk25aXk=eQD@De7b`sV82r}d0gcC_n|;Py)H1yQ{3XcM^8 z?!oRPo0PPF!gnppDtzT~mB_FM^GOKnZOS~MvuT1)yz)W3VlO232?}AS^D7}8WH=pG z;j10xq8k;CLVEoBz*^$>%ga_#m64t1$j+L>@6}8nt$3d&0&f!Cbfz*lBtF1Uy}Wu&TjfS$^am8MrX~pc`B^4CJpK9 zpAcbXQF#E(*;b#NoZ*|=Y4i4cgnyq%3Z4(Z<;IH+xEUq|`wzyHvI?nlmCpmfK6v4E zi21sM#g1wT%^@0eFU;&D08Qysnb!@xS(dnhzqo;=<0CT zU;aLO40Yp@qvl(j6+x*kI+RHho&i`E5M;M z5v}(H*jTBlZ(D1N&dwk#@$X>{y>nc#Aqr}O5;#I8k^97lP+rT`G;Z_bCDh1o-E z1pW3}{E*UX9f-a@X0CK-<5Y0n3&PqG6(~G)KF57&qR*b$YH-(4wV6qq9ToQw(Nyj@ z#5}mgKRjz|%E@iNf#I83o?m)T_k~2cRYG64&D@+X9vioqYaAQrAkJ}*`JqMb55mfW z8gTd%IL3+uAI&s4(N~;HM)! zrLBo55SShpL3h=+&(A-mrG3O~oaPRzZecsC4X!V9Il;H^g!6;Tl=&V7k54vI_Q8iV41 zd3^3wacLNB*bTL*pF7mtjs)}Uq2^{g#5)oe)6Myb`JqqT1W!A_|D#EuTNg(_>&Bn? z`k6ivMI+pQOAwC$PK+bs;P0LI(YzdE&b?>+i;tI~c8uW>ySKmrQe(2=@!PKvC_uqa zqEDU0qa*b34*Hdf;_nF^CHe`DxjeMW__u_XdVQ=To`?SI{xG3pC*8KO6{ihVx^u>$ zh{yJ+d6WaO(r7zH{98sLYMrRZsN0bp*@$>A6ImVA7ni*K}(?rBH@kbHHlAsHj zc@F-?%(BZ=@rLbi#I}qx)Lkpx-NSrV4<*l>4W|2aV{~X=CTx29GhwNv{W&njKM2e# z7L7#dD9)nO9cI+Qp+Omwe!RtS?@O+jqyM%;95zzn;-6x`N!n?KyO)1Kpu4o~qQKb4 zuYDi z=F~fQWo<{{GHiVMi=i#aa65yE%_+#sTc9a0l^1Mgzo=NqEc{oy4A^15@ z=2ljBl!w-)hnMr}>(Z6V1)D>k5Fe7s`IQ}2Q)c@WUkolERV5BD53jBk)5S))zw(Fq z8D3X6CGUhOebt0NAMy)tG?^A0h6iC_|H0F{2Ao`mx$jeYWk>BCg=-Y~uY3T@WZo@O zdJ}AFi@)N%M9fcGxvoRCuA`2lRGQh#C$!uYzneIdyGHks@ts5oT#GvRJ8Z~F?C2Kr z{&sN6z1HD=IbkLj2GVS3pGla&!-Yeu#+y`2&+Tq@#K6gq*u6^#p#MGg1OjKWAdQ)Q z>#=V%>E({h^V7y3 z?xQ)1*8BPlTKkoDfC?r}w7Ub&6NfBm(8WL*&f}84YKJ2E(T^5J<&B?QVN*@%P!{kP zM$w5&9>oUEiWN;inu+Mk*xDr&y&OQu5c}ydyl#GjpH09uYC-u5bOzFRixQq-!wu5V zp|UWa=&dXq(J=fi(GLzY8d&aR(4V{O*9frKz=e=wi+t8s%^&+*psOJjH>i_} z!@{Nai-Kv!ZLy!6&VXM_>^}|{3yXh>kz*YNB27eJI6eR>mRfm_rRm$Cm8GRw2R>e zI&eKCmr1wURkzxw29DXHK5!@$!7#Tht@O6<)c$^$K!Lv7O=KKwx=}FPOQ7lZwHs6Z zgCSoO!auB@*hMsOZhiqGy6$BRm9`0&4EcmItb6r(&=72Zu8Vo zn*m~3oW8kTobPK8@+#!?pFg4d8#R)ymMPLmbFzxF1|TdFDU*)2lTk4h>exV5tZ2wD zL6cUZ5Xn43l1dD@Oe%hh4GPUa{s_e+>!`fR8);aW60H@SFyp}qgMwHOeAwvQ&F3$< z^PaSA9-t2b^?TaEhcIMi|C6sKw=YlE-u7vG*Qf2ac-u8<3jb>5>OsXU;g1jLHo{cjvqT$k)wzrkGt4};YV-Y%(D|N<5 z2I(?zPvtRTYfPc<=t_|lXEynGS=rupdyQnD zD?!hure)ec(|q5!6roWe^d_3t4tf$y%B72V;T0BG7X?#9Ze=NT%6zqq%*@;VzjMw) zyzlSr_xpU5Ju~ynGtWHp%*->-%sivWOI}-(kk+2agl$hAoo4KRPR-3&tV z%tS~VMprvod=Fy$tY7D}sr&$E`>~VxO8C_b9~9(Vr%gmi>^9%kWPfps3D5kv2%7|5 z$t0$BUwOk ztRWLYYMBkF{#msKMxIktq}=iWg8R|xlUbeiXkrsP8Uk_Q{j%9+=q!WpBQm8^{Ly^z zj}B+xnX4Vu$LKcXX&{os3Nl5Jo>$>mKy@Rw>gmqLk|P1QXbDC*qb4A+S~W)!8EZ8} zFn|EQr5;xQgRGuVAD09UA1 z{W0eiYRs0CHocfRDPT^H>NFOflk`HFi}BiY1ms@8U7$|`NG=3I&qU&na3_|xYG-Q8 zhBK?k?GC=(Y|H5=OJcqvH#>wjbJ<*Hc0;;w)>WJ_H8p=)W8C}<#&`R3`IGRK%BVJ+^YVbWq;@@i9H-`6B_&B-Gq0@uy~22aTAEhy*FZnNFD@F^2N? zIht@JK<2HT@;hKcwMyo#ykX_c0>~OEuRM}iowV7>L$p5jk0&4p|8wm8WO>d(^V8EK z>`8N(Qnymecrm)Xprkb&y5(N;gTVd7AqC{Q4%^NS=8EVv+ZJ||RWNtR>JHXvE-PWe z9{bjJ{kLjo1*5HgH;0LM>RZig-+FKRRs~6~!VTX1(YS^6QBIkG(clX0-~%5Ql_jny zWNTdWQ_C`_C=bb^BIw_FW^M-@JYbisVQoV;58@GuFCAKk)5S$3yK1)qjM7WtID#Ef2&+GBH&@Pw2fZ0 z_O?<~_E(#;6N4Igz^!#$`C%!Q^L^eHXw%;G+dHzpo6qg@fm zsIA9xgi=?Pce$E>-&Gk1s>`+FTaeKF1sFzO#z(>mRet!krT(;-?>d-8z~(Qu#O8BM z=;lJ3PY2~$O$WxpH4bb#OI@WSHvNbcKd%E~Rl#Li&r!*$@$Ep@#79CMolAt)SYe$u z;U^Ui5r&`cEc#(!>JLl9eo#TI_#LQl;^e;W~i-J;)@2GR<> zDL307@p!Cwe?Vz=W4O4%m2g;pDaR`=oMl9r zq+f?Ezny91-!@k&Ia4?{b^!No2vhl5M8EwzxR(aQSqHOF>>36^Ji4GT$iHA$l4S!c z#R?aP!DR~4Ul%V7lh&eqTR)uA^zwmOp~go}o56kqD7QYmtRogwNpovddI!BhCw*r< z*>RvSa7SLY&mc>2AbQstZ^LevKCYSxm-S-`NNG)@tL;c(vzZBHlvb)9O8E&oDm_HO z=9x2hMDL$pQlx1vQrU~t6-7ST9UccN=T$rBoo=7kJWpkxr>>aiW6hhFJx>EYBVFF> zvQ`V9LPF3lm@^M`L_Q-Nik*eC8f(dQ=5wY$H&Mea_2zO^T#*OkwUaORtny%=?N{zu z8OTKPN=5$UwU3Nk`^a?DB&L7Fh^LB9QA}(lMHnGeO5;G1f(!8ys`^=`k2$&)v~tNK zxr~Wi_FPM0?1AW|x;=-E&_WE5CD#42D zZp=XUDtN!>tL-}tS!ZcK>t&-+G3A75ieq*NF4H^D|04tnQUQDa;`GO%k+nL+!Q)cpRyY1a}-XeXpw28c#Y;TXmSxd%GoP*5#7A<)j ztmT)PNm08EpF5fH{KIBa*v`DpE;W;_?K8DPhM5$gQfru*%wm3)nHj@oQDRo%j}Yj? zuZt62h#MCt1XJR_WyDQ!{NH<|NlA|2Z)lh?LMB7jx7!Z1@e9r5rFQJE z@Qci3LwlrFTx=#UqG%q!>p_=@_;nzGrazB(5gFD|dKa2427j`)9ha;aCZ3&SR;v`` z896wfooI#$#m118K*>I8X6I|!1zPcIjm0pSETZTiHIs!vD?VZ-3wmlMK4f+TDl9>R z$vj&5d1jJIE1zm6b9+F9@n*HJg3LfVSU6A^XC~7C!>qz2JJw95xZ%dLW6c=0o&kT@ z7OABehPU;Od(@uAm&S4WKrKWhHsFI@5B#V zPjhZ(h}KO5A7>_a+HCi=QD!MSrdV4JP74gJlJ4>hYJ734FNDVogWQ@Kn(QD-I zqD(QyOv>D4@*z~*vl;S{yG%CNOx|y^Mb)Njc^s^Jz1On91XA#DdF2(s~e!_e^9K5DQ@7?4IVm3%7>C(+rvC zMl9Shkx6YfBdzK{;kJn+wb?ed^8HXg`dOKVJlJM?kd``2{KJ%6;v(ZvB=2V?W8Fpa zH%(-8Pm#j!CNdmF@?cY;^`Y!VCU4q23bU9? z`)@Wjk)W13T%t=OC~)9Y8rMcCu{&r9*IJ@|$hB5mY^(5xi6~?O`d5M-#b*Z6J zDGsCeiAvGi`M+y~jjH@D!1*#pDt3nkeLR98zqUd=e=})|bRA4d?JeC7EyY-Ujs+uC z_QH=ZHKT4f<-zci7mR$Emo6=J^A#P`RD(TeW%vxzk0m;c!$ zzMv8RALTEUOx(M(L_^W#pNys65H)-%gFo{abjq4m+tzd9B@=AIr(NWqt+qGMv6oC( zFJL?_R%SQc(NzYOU~g)Hc}V z^;?;kizZUiYCC>T=rWP_TWxF32^UO4p$6ZW zr@t|gUG(>YiOtjSXxMOe9L^dE6MV@I#A7HND4sBpZSuDtd%`5Vsu6QE#mX+wJje&9 zMRmuhlxc-7Eu0OGf;C(?qY-$Bx`wQF^HFe^$kVO1rzJj$^`>H_Wko3ArBt8KH0Q_)_cmoPKG=PObWabXJNy z(@NJ8Vjb?%TTfqyUbn1O{HKYmXtgao#~kHrd*dywe3gmPK*4KzqvyAZM@*o@Hiv>& z_r@=7x_Mc5t+-Q*2yQHy)%t-OLDaVrd$>;QlX17hV}-bGv3 zZK1e6Tp~@TbnYM8YEz!W06mcZ%p_9nM5bkwa;kw}Fbo}fu0s4oyYmrlSupo}0DL3p zruuM`9w$+)!lR&~INK5yDF(zY*6@L`cvM4k-g8m7SP%-u-;@2^1(&yMKO_qMdJ^N< zw?HR9G?s2Um#c(TuF^z{v{3LAe@5}#L@gKX!v%YBOO=)palAGbUNp6_aL7kdPv>7p zZlQ?+M#JZ85Vy1+xpz*S2~xU}x25FTk4#iTC3D5o26?qFjMNsDOj&oR*bOnEozlp)0UYMb} zut;@bv+BY-LoNua3kN1$xTX5Teh7yvw|8$YnV&cb8hALr!Dj_c< zVVf%9169IjszWeeZC|@Ndw$`xiVIg&T^RuhQdurBCVwrL z;Y$vm6UX>T`!|*6u!-{@e-6?(`iJJ9`_Z|QkH1;oT-^1dAuoB&nvqMEOu|{O%uMMZ z?sly4^m*OZ^#PfgLty@t9mEu;k~dob1P5Wbud&J(XFW14l3jFtKwd^+xvRhecopDr zsJQ`SIm1M%t%FBV&zv(T-QUHAog26n{kaW6 z+_VU8bSO99i#&BsjF#9fZXG6DFtBC7nlwyUyv9nm4`@3jhH}aM_ba)KLEPF1E_EO& zXc6>&%vwkulZ>ga*dKDf9oz4WA@{aReAho-lPleI!G6VLjpVpCMRO&XE^B6&78AcKT9o@!n5!Ti#+TXM)x>$LYPfUmBvnSf>bX1kj4@e(0G=FO4*J= znO%nAwXw?*%#h%774%#?iqDXSTuLuSx*uGO3HF3Y;&)E4m|E@_T-bbW z#kj(gw<1HCAZde>o3@0Doy&!#a--*QE5gXPErN#xtu4E;(m(1#i$2zBFuaWR?#BN_ zxG(l8&StvcDM{KM-B+2-Bx{cxpa<5Oqu3}))t*0XrU6VYOdRN2CQ7}Joris+F*#1eu{8$A3 zDFqm&iJxI22U~1MS}XHdS$H%Qz7nQMNo(YgIMqZxZjrX*{8)VNZ^7!2 zIz45lm`GVmh4mghdin7wCh}p6Z7K3VKqk{x1z$zG7IRKOEObE!wR;@bGPgvhmh5b? zZ3jvGR1@2vCEHtUi$Ej!;IIEbuGe!kVM6vK&zk!!HSYV>u$Yp0(D>yY2~_U6K^cl41cx- zUd7~=cH29tT1DE=3S)pk2+`~fVo1wb8p)3~34t2AQYEL)3ZszBhf2v!XKAGHfJqF} zm~q~UIL`|A)7)WFa{XBvDcolgbQ->&rm<5D(KK!m2WT3{h~b+2I64)$&tervbE6;S zrls!e;8sL%8$$WF>G<)su}ayF&VA|E66;fJ2=!@G{3}&TozzahBS2TC61D=6E$j;@ z1csLW%BKaDdWtuFp&I;JfgEE)s4g0!Yw#7`q&eQ|n}hccg?SoFx(Y2cmIGMF3py|w z%lU8xPSAOZ9~vi2F0f9{NpA5-t}~ab#E{sp;@B-ls2f|1lDeTPu{Mgnp%eC4acPis zG9*wIFo8AM`nMd|mu=c7Iv6zgWh2Zlbw=oq1cmV>_#=LbKkWP3+%qn5VJvm<=*g?0 zt(1DQ-q0bKu*Uls#=0}-N{5(DN6ehQv`BJ@0kP_Tx=N!f zJ^4U7KsCE`DmsSD;IY9)e_q57bParuWkA~B@oEUPpL-9ENQni0N3-aE^B01$W>FWp3Sq9K@9_|DSscYX4w z15CK>gEL_J=W)UFvSBB>q_K?9vwt9sz3RjN?gN9|eX+VuZE8j^Mh&HcA0C@F(x=1_ z{6P3%F~c}Z!!9+RM{74}x(vZ3)-yua0BG{i_h<%msba(aY1{?_2M21?Mv*yZ=`_OM zjT4{tB@fGiJ8|r0UxymLiq6``HDf(-J5F5TOGBd&5^lw@ulYjAuTWAh`!I1#Y+*() zP6DD`!*cJq`0ssK(MSBw2ge!V*D4rLak2R;WqMn&r5KEpfcQ(_e51Tvbe7TDt#v$W z6z-=IrxhN+kMNigmR?TFzULBtGm0DVCWzcFTw*Yn4D~UNOVwiq_fq~oVIqr)GyFSPx-uu=Q1PYrBK|PwVO5n11Hz{K)~#3(S3tY6gj7&n{wctSi(QX?*RI#l&znp{QWrcB`+#5v)*; zRygpqMwTM?Mf&hY${L0GDTU)PMeZntIz^E(S&=(Q;h3mUPgJBND^lVWxju?%N(L$u zyTUlFd0N-BZn~)>@q_y+fP6s+kEgq|I3~OVdrsnTuyRFUaz+dr zhU33*E|&i&C7ORCmirw#av}hJDAkB)z}zYhaafwPsCm(spIJFGOZqS4sVEy2Je{>hJ0nV*9VGS!eNU zj_U7J;=)AQQKnq2PE?fA`afcv);+D6i9=ra%qi>At?=uDRbaMXH>QIp{VW?iR~Y8Q z#Sj+k57W|_{unWt@G3a%+l=LAp9T4f}JJh3t#M8u%HMdCZn6L zWX7QNW*Jv>q5UN03mfIk(~WR=DSVH5@0`x(`SP!!sl?YL4zbgv!e~=rjFbd)ic$Rv zCZw`%hCW;}Sw@jUi%ZFN*`xYbDyxrFr~T{aaxcd}UFBYjRr3vtHMuxUG@& zT-*w7ObQ3>EWKG+$RLSPJrEuy{zRwDpD|^^fgX;YdzM&N3GWO@%rHoAXNI5yTEaBq zY$MGOoPs%dBAne>!fM2aeJ$Z5OAY)L%$&k8UuI&8pF)`K%S7W58}S$t;e$zld-8Cx z%2!YiV@8x@L<33gMGQpItQX8^>f&ao;iGa8tNu#biyqr-6HimMkNqf4nCS~fw+B|i zX2w41%YTPug7BU3Dn^SY4dZ9t$mS0aX25tRPJ&J$%{8`GWG1F&VQ<`cO@1fu8miw? z%>+GAi5C?ytl+;io_Qo1LfA3Jk_?;>#ey{ljg9bD8C6e*lXZn%e|zlS== zBlot#@w+1Tx+3?QLMSYSz(JUl03{E18Pb(r# zM>~rXQSj02%#OVS@8il7rwkmnexRATa(rM6S2g6|z^KpT2Z>)A%YNa%!aA+;mlEr0 z;iUnQCiqZc{D2afx$?Lv17p??9K!tc!@$8@Rph~eVV}o`L9ab#%s-4ut76Y<;D1Jj zz7elX%`i2-Q?9IN_NX{nAe5o8<0=d$(QNECggh|VAYP{I5L)O=cg|R)q6QTA__J<6 ziIo?rz~E4^&}_s55@NHlA}X|9c?t4#Gr2Ui%%eDCdtlz$%~7SnZ3 z^_NOuS=jVVL3_@GD!9E3xKNeTZm&KWzzm{iJ&xv7q=&I5K&Eg4WHxE=D1do^uwnoN z>1!hxGfUGJPTv)}cn^UIRn=)@bvh1nI{N{(?=XHcvANR~;y)F56h-@m?#j&OFYcv{ z|DNT2w3l@c&Q9AR0!k~qk2*{H1Ly+dCh5~E;0>;86l2r7h4iok{-yv7dT5(5``OY)w+$}@c%@1 zok0H%U>}l@=qM7fc{#gyK$Wtj*`wsd0r5#=Wp~=^Uomib8KPd}YBuQh`(5g8d@rxL z@$troB6o;e!`MUS{1d3TSFAKUEadMQOKR}U+DJYCqZi+au`9&79T)CKm!8p`7>%yX z7}KVYhD+t|@O%XY6#W3m-Y}*ePOR)^u2!79bD%9}mLL41Kv=&3o9%{>DUlikF2{hO zQ+D;glxP_Qw*%O{_QkLudSY-Eeu$B2<`)}T56_bHNWlM0;qeN}lwA|}yULRE;EJe7 zdsIX_F2LWbhzibMNLwr>QYB#>Xpf4tL=NbRim(g{ys5MdX85b1fbXP8B8N=aRn3GJ zOwGY0nH!G+*~>A^r`mmKhldn5(aeA6qLU~Ywl@e%FtBmP*-VC+L$5CyA^1+5SKG3ZD%O+ ziuzmErJ&~Y!Cwt!UP=j3b4xU?N;rq@A+k_$H;C37i6h8u%S=y8nwB+dnnZ>jGV9Tydlo+!M7|t`f z@JE=)jMVK6MdRlWOw9-{u`h?-^SnZPIqix5HKjT+&Ih#%b&)rP+B&43F*FkhHqXMr!hvhxJ2-=-tFDYcW}#ddJ1iEwe4P)L06F;zH?}5hC$qH&ff>RzC!C^N%aMO z#71M|^zNweZc2;rrVBfep=HdL;_Q-)5bgG~`K4gcT;G)U@1b7s%Jb!QHkXbcx$aha zPGkqn@`6`?TGl)FfV;e7@F-{OfpZ$^ua^&A*vnhsbH-=|~r~#!0 z@tE97N{kxtPm)cj0+1@gNUod@MUg4KD0`drIB@zy=v z^zHMzm=i#rn{iM3Y1+bKb_v>N8?^r?jE==13d&TxD`nw5XLH)38<_9zUHhmgrA^Y| zvmQJBT5vyc^Uy-W09dbfRB!tPsz&d;s$kOtvu2+juX!!Jzwp}7!rhlD3}JvP%El$$ zy@%YO&`X3xfmxGISM0uY;Ba>4j_Pt{L1hjrzqX2bJYBI``C{8ZVpvx1rnw%WB;zXK7Y2NCU`~CQzd7 zwl33G7FmxKVPjOQV7lPbLL6i)_k#8xYKFi5p`rQx3z5fDWrx+}RPY!af>cA2Q<_+ovujS^WuJ=pFaOK4Swa zBz~rZ&wc3K;mkC~gy&Jpy~F@>WKaqW708p3vey-9P=DZJ?2bLaBR*=<#oguCd8&`g z@^$Qs*Mj}Ur5IBFUA!POQ-g5r)pts$R!kpSmN*SpV9_u*oi~N)e`C*3Tg4Z>auBJ= zwJ1{36)BG>95d-^&M}^@?@~r69Gt=tu22t9kkT*s54G?`>p3MnO4wsutziXf3Q@xp z5>#S6z<;kJ)-UMRZhH`aHQAh~P-hC-E+rD>{$9OM#&=Op` z<}*JCyF2hU2YuQHxTDpVT*6o%OL)|ZApSj+A-+dd*oS;@bx3_x(~g^oUx?r6AU_WQ zJmo8w2t8v*+Mz&xluz1%K$7)^&2Wk;!=M2C9q+^Jt6a$p3Bph9G9TdM{`~g7`d5O`|lGxcFbZE#YG^;QZ`8DnraS z{!PQbVXX3|+ju3`Ch?RG=DqfKewZ2FhDN9utrds3;|e|~Dhq-ONYt|se;fra2|rFK z2RYtw2@m_|4*v{t=lH-i>&}KCN1ltH?W5a)IQC&5W()I1N01}O#Xju=bLUKICXt_u z(;d12it(~hIAWPG%!5A$;o3ZKzUaa@G*$U{^m0$(H6!z)b!v|8I~5nQm>cp^A@(}| z@GYskql*iysJpFuJRPT}!B{cpew#uRE-%sK29Is3D3k z_)IOVY(Ki36D^Vb+viyZ2H@_RTKHH|rA9x>P^3gExciir$xmB!tCM~^rS7M=G|@7s z|IK-p!2uLRc#@vIDb+G6N1Kbg&#SQhPTGp!_#z)DN|&!Q&%=X=?b4~FA5YmGx2k94 zh#AT=g-xeh0<5!ggte#!yN*@^djD0`qE6vcUAap5DxMjcTj#QH^+{iya!_lnu*C=K zacp_w1h0{KB&ACFW*8Pwas8NO>O(GJx55(VkMcfuK>}xA1wEg*@J{K+SJ+jwcdWwR zQM2_YTubSPRi01eg3TU6xsFl6-BBU5j?tG<+qnwMeK;m9e0i6dl(NTtPx85#Q>i7H z6aHV$`U>^>Po%3k^2CI(WA10C{-&HZz)+I8VBUQ12NJ!fB=+3D!}>c*!IpTz7+QkM zfa9@cgz5FR3oftdvR_k~uBp;vO1RehX$>(q!JJPO zBu#r@jr!EZcTdg8zFb)5Tb0I zb4lh5py-a--4B`vo~;yqaWTOT4NN}>*h@AgG*abnD6FYqMDMlIV85=CEsbODOc?vrjxH*7J!TT`|LA9T6 z5WKDD!7efiumGl07?@^-ee|@mv=$$}km8As9pPS^llgQk^TXX$AANfcTN+Cq zJw@%Hu;=cLC8?*Vf6((v;RBpUAhSNcLGUPI1ox}AWk~;V zKCcx8RMg#Ld?WT8D9f3p5r=y-p40k|MnxzMHF%jT*0#%Y{@fL$((_P~E~DS@k*A(( zYD{|I)b@*6?>3!Wfcp^5f!eMg(>tDP7|tj&AMK*fSzfOZW*cvk%Hca7Ha@5JP*mEY zsv9GYdtGLL1oY~tlE?{{U zj0wxP=6WcW*8pt!nTW;3d|2+iLK*dHBRngqZ2xL1S64>xV~lQ=j5j)?$ln{eaX!pI*kl-- z;Hc8rJ>Wm_rb_Wf;OQTS{5YJta#6tzX_xi&;}7Cpr-q~s<=%+>c+1nXNeN+OAHWtx zlV|(zt2h@_npE(8ks#uR_n$gsmSXhqop8vY|KtFpa)!UYZ~DxGJ8sartC~v`Q_7XO z_bD#cHC=izf3^_nt=oZ9KM=?SBgXFlItzt)-yA+IN%y3;?MPE>;It&hdxHn#Jt}aP z_wwLTo=@$aspaxK9ydkgZH}6L*6yc>*~66h0Rk=-_w7$nD=rOhk75S=P=$N$rEw)1 zg&eO?lrvRce5jXcl1*rez}5BV=bN4yaNMsXj<8C;r84N6Q7 z<3eJ&A(6>Je)b>+ryuN3zeCSJJiVl z5x{Ryl?3`!*awxQVwF@F2qFXUm}hosAdLKEYLg|{+#J-88Pu&D#Hg-ao1}H&G9<7x zuPm|$Q4bRKm2X!B1>&P(hiSrs}Ki`oobP z1D_>+*%DWim7%!(EZquBP7Y1BW+o?xv5)q$GhO~a)`Os>9>3$}ZR59lhPOVb|adnj?dmf>_Ssc|i-zxG4jEuV^$ zxONpz4^40KlMYRXaP#_Gmg1d`Kxn+nR3e^q3ui4Vntf@xaz+JvFJ_je68Im8@T~G2 zZ8-Rz((<&r9G4kXl?LtG^w3N9$y+$NaE-%31WOpSUolMu#qCiQoZnDYmA9z)r&Pl3 z_{wBB=^1z8;NcH2Ie8dy5Bvw%o!N44@hQ4MzAV}O#al5@Bp#&R^F`8~{p}TZRx|S)pP@j#nC_YXwuPD^#im{-nV9s|6Fw^vuEuQW z0{z`F#m!Sz_%fbqTmqfw*y}2|sM5XnBPxRGV_gq%^Bs&$|PQlfoqj_y|$O2x<67InufIhfVz| zGpf%`K5$@iTLcu0+?QRTkV2V5z4Nq@OjDDR_zP1#nMfs`FyRdHC2A3Dc zOoM(eey0@}^at^~#cI$WU#g1K!bwKkQk9vhHt69RW2WC}gPsls7^ld?jcmYwj$_js zc@rJ!Bt4-qeQM)lIq^@u5(n*IBFi)s><-}e4NRC2!i^m8+_{r$q65}@dcT&V-In9i z_{jR8zItL7w4f?adrd%dO-NTwsCXrYd7KHEwsdnad?0!Xb5vy$3$2^>j)orpx3Y=T z&QANbTxmOW;?S=ZO9LvF239N$s;FHg#=+*~OlqHdDZSz>tmy~%cw^;UM7?BR8sbc< z9NWIBd1-Kax^vv6zjZC`-#%VUHwt15G{5g+u+in0mvl27i_p1|3obo)^P#*^nYb{G zlW9M}H!)Mr-i47J>k@W8V;O_xmzq<+$5A5Vxh>ShU!+2N0iwuMT}r%}+@wKPMgupe zH(`N~T)_X)e{|~T45mN&>SDY$6d;UIl^6nru_`7M=ZQ3h4YzP_LcCwad^t67TJph9 zho8A|azb4}XQP#oY%7cc4{_vQQ6b7&!f$Q)pcVpY<7Y_KsHOrYOXg z7>rYAW2!veD*(R^&~6^jU+vG-VaJo6$UYihwidQzck-b_88;Rxb3b(n^Jx4e zb%hJ;LjT4X5REZFo>R#iCj}2l=Cc$po{hw-4W+^D^!$O>)S}erz`K5{{ctPJ`cctq zRU6Baw!V7u^c(KCh9VW%rAv8R;lQ&(uPIV86^>^WDXSIg429zfMG9VSnWIo+2YiA; zjV2zhNQqLY2Pqu=6e*0=6#sf-w0B8#@QpFJSBa)G z8`B<1NMjSyZVhA7)W;O7{UT%6tjNJ!O8un|a@I1wmPmE`Qp-S(UH)CsA&kZ{NS(LN z66yiZDq*_mFz!QeU@@VCHmhxm4SjW{zvGZEm`k zR*pS$;+9of`&7jp`QJ(H7rjM|!Cj3{0TGxHzOrM1YtB^s>qgLHsZhfoq0nAd1o zFs1EBS*vnVDqPymw5IU9b=thNoBG0NYNOtucQd$$y;o_Ty)I_+YQL0&F71i5Md9GC zuO(icw-n6S)H?I4CY=+_}eyswp(@;aK4b)$)wSnRXeL;MbCa$j+!UB8fa`~ow2 z4%#crr5kWTDvb&SCdD!jbfd1At6t~5n=pSuLb_s@6l^3TI?zblLl&j~*f7u68_!5xJ4aWkP?$uT| zB*iK>cY7a5wu3^prp$5azB}6s34#9uV$Zhrf#h2$4(F{zMc=p=Bsjt4p6gNSb8^fb%xLnQEahD-Qtx7@5zC$N~<ek04==CFeacbrK`Tw5Z`vgl*diLa( z+SF6#&)05V+(-^vL@(XMs`59ME6rM-+NZ#vXPtsCz9bp^JXwiZ=E84*r*i`n3aG~I zB#h}&*0nu>2jJLyUqC@)V5An0GJ!wsXR7zosDSf<%$f z`rNs_k=$?YnTE{dzy*^oOrqUf;`4#8oy>lb7&9Z=7_fo_yMXQ=KxCQ3kO|ZG?+J zcVR;l*Ga%VWq&A@Ejs#^y|;n(38B7k-0lW?Zj!I-8~45x$JX|Zdsm7RYus_9SeB3H z{-=Cwl~Y#s(yl)}lDr{B3M+bOL>)IvQT%c@Tsqz!OM0~dY^+$813x5Q&*e1G)sxc$ z&pK6)YzW-slbIW^9Je*}*|-Q|2}sBcp}Mez_LGcdS;fjVuLcy%smpwnMx-vw#$Oys z9N4@pll9dg5HSMwjAfaYiJz5v%rHdkEAy}%COz_MF?b~{Z@@m_@Z}Bo&`&I)3wL>3 zlw^6C%uk!U{t(+G4R-6x7_vT+I(84=7s|2x#{fhet@~p#L+1C?UFvVC4eS&D* z?%uH|zk&;5IYR=M7)T|TnqA4T2L2I=G$|C{Ak6zS6xSf6N+A;4K$FF}?n;0T?~G8Q zZ?Hu}%9_tA#py;e5HRdwqr*c%!tp6CHVyzxSc45A3L{K45*0Zd?n zEl`HxQ;fu)COj^|Xz|If7VUV=!!v{MI`|aMmygJj@1#2tLR4=Yorw4v&q&|>;oJLf z&2@6YAzVlxH)JR`$%lJo=%#ME|FJa-oS+{7c^M$XxDYcpWEeLontLU1yBGDtxOJlw zJ9k?*IPrF5)&?g&O6>Qn#YMRa#n$!i+*@C8I(qD%(gb+ObAQ~{wa!SN&A6Hv%nb=R z`YAUO_dLhBi(Tc;X!a#5ohW;y(-Q1$F~siD!21L@BxZjdH*z!=GTCAX*rkE7f9p~w zHr%#mII(3YlO>aFG2Gatp+~Y8JGU-&R@mCVgFcHseACH!GZ$D}vMT*Y}X+hj?|#03yC^hpW6%(D?n0F?aOxNr%Oak3mPz`B{y zE(#a+0$_ax~K6fe7n(qZgN zM;wkeWy*B0QXzcmpLDR~Kj?^*K!mw&I@tFq9lE}B0H1jEf1`uXiebix#8;x{T}qFt zFFiQHEUcI5QOfj8i{by~?tfV}+J9-EFZ{pzJgmHQ{D*vBJ1$*jhiSvj3p(#yIZlT# zSPtEB(wuPVIMqq`$?o#N`ETWz}_`W$#y;lf7Nm<=xd@04rmb{+zLYq_sd!-kS zw8({Ci%|qkU_uo}qK^yE%TI|=JXss$`fa+7^i-pb(al*iQehzzQWDm1dRYzrA3Cs2 zu4mTD9^TmgZ{E1`NOeANoa#92{q8#3K040qB>RuE-^*2|bdi0>sm66)$$t|=O5NW$ zb4WgLe`Bc$zU-Dl?D;?IYnAGIUMYSVvq`%(iBgyydnVI5Qh!1ZofMi&HJ*Gy=U zoCDvza|Fb(zvD!Zh-bF7@$%iAr2Kl2#poEX#39-Gw4C(Zdi|q;OGX6D89}R@xGYgv(`U$FRHgnUDP~YMDFh3Ckk(Vdplle@5Q8-CG zanuV9?q-zy4?joCE{xua%fNxkh`j8%N+VUx4NJF5d>)=b(+$x?iX-}=RLcXdq(wA7 zePO+Rjw*2o`FDD$mWVx+;S1`qcheW4JXY?+lkCE8N)j!J0uJaLiGepF*a5F&0Vc=! zv4?SPk-ZBQOU^?=r3A4mCVzo&OGyV)s((rKBIBew#BDX~JzbDLQg6)O|MzR(Ka=j{E>tu3(Z-FWvt;6oVryz6r6|L_uAmj!Jb5y3m8~o`d-p6UiQoxv|nW z>W)Jnhbvd~x)w%WT6eU=ZxVz+l~$QPo8H0fC*2P5lRx`Nh4l`!!>bOV;?KA!f2k}( zrk=h*B^GNJDV_)7DqmJR6w<768X$_CI4Ow`NUBH`x;Jv3AKa^cE2*QN`g-bxxUCX**`nTd9CKvSmaVIZ?omdc)@}rBalZhyZVIq(Nu6-Q*TQN8u zO;*b}-t5V-(jmSPgQJba;-G^s`$i1&I&zF7O?7;JOm3@N7vX~XafWa%c`}zehfBo4 z4D=No6(Ns1#Lbe_SVs8U)`EJny-v)NVx*yAjzfG+4gootfoC zQb_K9y84;8Hp{*egGUR3>&>_$%jd-4(Sm?_Nu0A|@}-y5qz(&t5*wfmh>SwlskS^a zgsA1*nK6-C=rS|O>N@^qFmEws-cX0|_n2Hig=NwZa;J`B7XF4x18y8yS|@J8H{iyR zAr5v^OfJ?g*!#z6CtT+7g1>NV(&BD;=$zuB?aDIP(k&x@ApfePH1Qk2Q(%cCsdd7O z*xyiJ2lGH1)c{D4gJ18io)kx!=kH)&=o^PlNqimR^L^teoR>pb*C!4#rjQsWuIiOB z#EJEie*JWB91W5S_2RRA@=~UfggW7wK5@Ny#ix4n+OE_xjfAUXSGfUE55RpX^=+Q- zg(KH%ZP#mYA96)Z<6-hkt#tEWSl+jFBp%p8(W6L9J^R$3_?Wkt*-ShWx#?eczQKf%!+hZ7*2H&ZWTr6ZVaI$;TP{FooTMKqj+^pY4W~ZUK+y z5~p#=k8-I?xQtcY+QF9WaFSI|#q7MNFawa}@AWpWo@qSqDNd)sjlhEEn-QEwgX`;U z`g$fG!KpO3t~YfG4L<9}#7d#3>utdjCN>3$=RL_&^|nA{W+m-a;>pfLcrvjV!bwKG zO(WsuCsIUD^pKP+=UgC{BP1b6>rLIzM{@?*=t23KS`Vyr8n)f6!v+4?_3R`mC=pMl z*RvC4t|M=6nJM))bU)OT9Y@Q=~XfU)lIE)7S_XPQ&2!i`Tas(fbAmyPX597GRsa*0rF7*j6W2IXje5KI<@(@Sy zsOxPv?UFpi(x6gOSwOFmhFo<$NXx@p=HOnfW-D(ts(ulKXt|IqCZv}DgB?RApT zm5rf4ir@)~Rx;;m!iCEL*7N!aU+0k1b>bje=obX?Ud|`hDx2KK|qHw!qe7e7ad$&&PN8#R< z@rC>qTz(zz2RNzk{iRH1)yb~aJOvHK`S;$SFAcub6BK=Da6@m=+o1WVU{wb5QIK%T)Ci5hjY>;G9<4x&q|#I;{6>q7lOe@F{t_~(PVAwgK^uTHp1A^l}Yw)-z3)phK@D5RIW^q>C{@=h&(i9+74mFg^B`Af)OYekVlUXdZ$ z%YO;^Z7usPASD?kmjI6GhyMZK_+{RJOZ_bY)MwnqTI`w#7XT^EgXDZI9Sitxq^z{< zFa>r+absC?LBe0 z`cE}ohD-G2lGV)3*Mi^!VCOA)20C2Je#up7ny{D4KJP-@d8%7}TFbZemcxW`seW9B zhFj~+WvjRf+??C_hn)MvTDGkx_nO|^AJht`X>QQ--^(6D_xn-TRW9Z>%40}zE&D@n zd8pUhwRFmp8b+@Bf1_cvj73=9gC(z4>~fb29Ir??go}OgX1e+1kRoM%ZF2Lgz9;T~ zrCCVtDQ$JF_+?+*E9CMzd*l8Wn)$^&SQgg`Z8V2W%X|q3-`a}<^?0<_Hok_r!yl0% z0k<=Q3mVQD23r!t$?RI&XnFEFgk&HXG=c{a#3(Ycb{wWb@pFXa z@*b+SalkEnh8S7z8Nme)wnT@M(eAuT@e_*bJ{hr+r6CYCT*Aaw^yEWSJbDDwI`KeH zT>h_?;4osWWll0UbI*S)=Nuwo5m+NEB@?vclgO_dlisxe}mvtz{IclGe?KY1c`anflQ zK9GrSs>YJE)h-B9P?{1+vt8ID1<7f`vJ3D18He_M*KT_kjU{gFq1(2Nw%t2+_MLx&E3`{e$iLMC zCk4qHcA@Cs;EH58F~0}yzxKoZaa^)q8qH)8NvBMn`7b}F(<$3bIrJ;j?6r_)Q&n zP8M@ZLPri+Y!^2I1m#L)7`ViNmgI2qxZU>%BAnr4f`|Y-)$S%CZ*$z`QiIia4y>y4wWSB&kxPhbO5pz!ox-DvAbd&g^JOPs- zc7DB_Z-JUb*`>uM{{kodl7V*N|4E4?!fvyo7RUUih(OWfw#Vt;P>t4@1dc8(`l5$+i z4>|dzhR$o?V^7?Fkx{%i0~qo{jga0WpSSM0=gEsT;)3412)a+b|7`acB)!5!T z#@rDf?i07C#`gL#$j0nGaqrhigAPCI{}cD_0aa9MAMge?s4ylVp$WpUVaEFj(8^mK zAhS~I2x+BVjuJzfbyB;XJe068HMIilqGrX~Fd#Q!LCRf$O0B5WZkBm6Ys$*X%)FL- zzh}+944w17=e&P>-}leVtlxT`XFcm#_ciO{!PJaFYUb_g(#x}*Nvyn+KMoFbu&2YH zm2`^C2%+L_DLpTBqVy=&6VWDQXMLwJl}HLr5(q^R9Fudfu0@ou zgZXSN`RG$f88aR$J4F?%T}=>SW{sQIsbA~M8?Zr6r|7?>2r^+)n&*0?Q@O@X=9ae< zsfIjom6uM$H_1K5y|v`^&P7uPNn`r-3 zb!oEdG}|jGX18=2lZ6iaLuw6g>NF+^r&+H5(rHW-PIiMSUNM~_u2jyo5B$`O5o%_t zP%%1kZXmn5lfMR0+LbErB$0pa)Xfm6&5iud&L#=HwMk|=!9TVVB_4$I8kM>Ua{okw)MrG{J;)eHa5&- zAEZ-gC;x5-bXR>?uX(Nhom}3DqaG7f5C?biLp#s_#1pZ|54~vpJNd_urNAursdd(8y2u-k?eh z&7`U9pe7fsS%^tfF5FtMnrL5 zu_laurQ#>*oPX}ZP2z8b)pe7zYtCn$JhaXun|>s)H62=CAx*eeb!dJ5C_BGHS}SDl z?Jaxn|0)|lLf8Wxy1yx#T#Gw&zdy>pw?mpPWFPJ=`|!V!eaK%_`|~>VKq1D=H|C)2 zwhqnik$uSD#BS+O0(_X@&FC;p6--{f3)L{Yu|v0c=-6c)eB=&H`u?u4VTS&z!Xibb@SlamlS1NYk$+Hr63oo8lw*vqtaOnko&KeTF5E1SN4#`wfJue_=wB$TKZ>FDi(-tqkUeo63x>An zcj2`D&lT3UgA6O(p#M{0K^-*hF1=(NQkcDi+dHtH>m}uof-Mm2_njuwW6Ea=JHJyf zJ*6E~*x8-L6g@`&RAHxg^3yw|b{;ODjP zpf2Lce%Og(X&m#!M~q;;-D!NPh#c>+&sRG|fA#}!9AU@9JE@==`@LBDp`Ebwl8ha4 z%D9|$|1|A6ciI|1t2YG4?b(r3*dF9>Gmfh%YdThq z4R+z`mX`y|oj>6BTc!5*K0)T5VVo<+Y?EKqR|YoTGc@nDorSOMTopd)h468#YiD^; z2zj;|E3i>}=<&q+Z8+Bj2v)%KC|FuRp68W(czSMdCNe^+^gO|0yPT1H<%K}GZpykA zwsmK5m8_o;yM*@@x5riO+)WaJph+Q?4~ofsZlNZGUZUp4yJJtLe6*ekZDz zi*?MQbif&InQN2l>P{*rpoI2=pvQhNve$~hr-J3WtiR~ zQI-aLk=}kH9lN!$74S&)ax8zwu;oZ?ZuBzxJSex@Kk>sc|IyRxaegHo_m6l$uKsx7 zy6^Z+JLTfix=L9&EQ0j>2POZUz;$o&8E$`=C!KPkS#GbT9F66V6(VDr*wv!YC|d+X zblAQXZ$smE6^QX$TrX20rp^;i%4aBl#Seb5VMkYON5>)Ddx=|;XQ7zx{Y*y5!uEn4 z?cQ$r%`ih%T*VL7hxsciSs(nJHih+UzQg5H8!L1Em(RWedbduEMf+a9bND zQI;ymV-cnmMZDAJla!2$BQnzMX?}fE#-(D7rhIk2SV<2%OBc`Y*9MBzTX$rt1^7ry zD4e&z-sh`+QlIWqkNVFEK*e;0q;!`(>L2u5&m8|d`e2a|y|8E%{Ak-Qv{BoyXl`4H zwQ<+%P=ZIYYj)(`Z-Y$7T$fxR3hdm{T;H93?5Z7ne=EA(*$VQ&7@EtTxWmwt=(WYg z587}NflJ19zamEu6 zD=Fdl`R+`+EYC6*)|t9Hc2K84o1-v!$BYntkylRKHzv<P|!(DwcoWcCq4cTh#(K10R0NeDz(ske}o(`#Fk6Y@OeH?Md3_11}THcB(x z&Msf-4ZH9V>Ej%c>#E&}^EKsSdoc_omMgMfMRjw?-?=c?n4x5c`)grm%zO)*{nT#N zXPQSJDI)3Hb_yjvchHf+(i$U4x#FHctlkRpvtd%ry}~P3mfdUdjNf{sD3L^m+A$wJ zRbijGL)EWQ*r)CMhE|M|{EaCT0OBk9M*G{9>%CB>%L8RoF(} zC(Dlxv^U!W_LdMOK`Nh)q#YB?cD3tM6mRQorrWl5ep@S+MqTNR&&5g0$U$^)2Ac*}d3|bM^{p*y5$+w^aB! zK>Dm8z3W#}65+E=trR`P_A<%_&$o(&QkMx;$bU1|F^tDa+S$pmY$KUJO6s3@Fkcx9 z3iiAONG4#t4=@r5c*X);DuhC&nXlKhE8~3m8eu=tEPbF2cDjZC z)`H=wzcG>&ezcvRZovZ2k;DYq{3KlL!FKIWJ|2*L+rs@}V||!MwmEk*r_~tcquahT zfR?EyY31_^y@;>||!s+qHOwzt&QLy~OPLc8KJQ+bZl>Br>HPBBk>c zf$*5puEZ%7D43=`NvURzUnxE4S*D1_-<|I+Z>7IKN{k{6#!WBYduw?mlw{-D^=Kt` zWV~rF#Ou;_UP{_u?9nmlh1NPp*vRvz@5C%P{9UrDT#;VEYFC`Qmi5YEr zAE5(B*};;uHa*C6t|#Z`ZF-<^vd`N1(?ATnTVXR^2ihni%7p}vjS<)P+r<3gZ&vKx zHk$j<&s(uK+xP@zcI})1NnQYZqm3saEtRtg5cFCb9|MPWh6v2?cD9Lyrpjp#k2&?$ ziW>kD%*F|HT&8@|*ruFID3)nJPFNIS4Q<95ABG@mP5n<;^{O=WKW@czQ~#f=7;ED} zq~4DSQ2doP?hl9lLjuavOKrTT8MZo>z+Pw@jVl0=0?>hNXya#r>&FxL>1J1k4H`Y$ zrvH?{Pr`*Q@vNxL_%Xqifw@Q0Iqpug-CHY*Sa|YRK@0!B8S~L!du)JstPt_g4Xb|3 z5j0k2er3h}(Z)XmukxjZ_u)27jgPF@_29(n1^q({?=`|E+6Pv;ls2*c`F$_Ou59Dan)-g?;bjxr_%j02 z&48f`WoeewW_;jQ*R9`Zkyn&=z4F9l-N-g=zlDzHKp0colzqOqnr(=q&3Mm;u~=xV z`|OLjMj>9yVSqgz-GB{38NfKv*k!@OMZZsf!>b26rKo@}YBS#S$p>%0HsPVXX5qEB zO{V#xPA@N2zO1KJ*=30Xc+ZJiS=;HQ19qy_{9m%eBaf1c{oJZ`_2xa+N*;QvmG@|? z*jA6H}T!82h4?OpoTWF#!je9J%1j8=tm{h1c7yGCwri|7~6@yC}GDUu(ho zg0wlPqNSGdqP{ga`a|o)^(N7+jYWOi=?3}N=~fgMHdLRi+<16n7;5yXxQ2t$82oXa zt#W*UqloyMRuR9^f>UI{-~0l(AWKv|&ag!J9Q#Srza7WUe~jo5p`33M7u%g`?(s*&MA~dQ=Rof4(8YK+r7X4| zek9CvfjA>O*9>D_7yFy&&7nfsh!kXtOjmzpgF8KOmpabpFiHZGwAOU3RSLS!Px61v zp7weKThq*+X~Iv7O$mpe3wtW~a7kG9U^;uPsUIF&f~R6yoZbQ%l|5?qsIfR>aMPxd zBR2M#{>Zg2?AYR$G_%$9*6@j!A8w!w5H+?bq}e`l)buIk&mNP4_05519lp4kKZJ0! zXJN}|c1O;u2f+rywo};>NV{dq~{61fqeo;jPA6i!_lZ zl9blooj*#uI7*Y8uQPCVLZW@%Z0|lJS~yMlXo$NHt>-1ML7wuKOHJ@VQO1;9Z+QZC zktk!v<6ew~v{JQq$X|bqc>1(bTW{oe{ZrGM5WWh3`_D(3KBwLL2&ilfKBUEW%8xYx&pouWFfgwRtkb2sk?XkB~3g z!NL|}zMGB0)Gy#foB+DbRm(`|7M9XZW3E97v_+=baZeXt+0ym4?2NLFY8^IM+5O^4 zN!mP_=e6vAJ8?tE+Sc+*B0qldSE(N@+wZsT*t$1qVpWR9{$lA89MDMvS1HpSKlV@y ze;H9U<|M>D&{Di%9R88vz%FdIH1mA=y)Bt^wpD-E0+fv^trWD8pO zmL{oMxjBJ9)hO2WWwjW8Nnlwmd_uFdPM?*)SDE;{7VSonB}6&8vxRR0pPrGxmm_Qx z(YY;pdO~sT$-+4;sE7-2dnQeLQy8;a_`{%~`IBi0^4{6(FD-m6n6>M@@^IvdTyQdd zdP`#NNv9T5q(#eD=cS~aOw)!687M!gja-8iWoiPus*!At-F4Rp0SE*BNaZMaPw~p+ zBM=4~-=baTNl!sHu7&9IR4<*mwkfg2xTY7~s1~!m|Fb2GnN56UBl?e5TCnL&d;nQu zvIUE7sV~LVliDN;Hn;^nJ}F0;=)qWoS<*0nJ3ay127jkXOEWGPweSHgm`c3DLyP$J zYoXqOa=C!qboLhN7Z~Hbm{`x|*P={7WhqT(7n%_%@Sba?LaQZH_Z_`a@aRcC9(b`6 z&BkSx{L6lB=HVy~`nUuX6*2Zb)~t_(<2c5?GuS_yjpT$1yC-doX5I(X+Gq;kNV6zi z`Y0M7d;ka9YhxsWqrC80Gyi2flIhX}{_}Pr_dv7qrv(1vb}^>@s976hk^6pgJwspG zP=&JKpMSiON~5|#T7~wNKC?GCYX_t(9@pV%X7gTbOk6xJ*Xvx@BrhJ9;&GZRIgc!! zO%pQU_S(GJ3`>zcVTnG&wq`BCVi(x5u9-aap@NLkfd`1hCuSCUo4Q7x|nz}Yj=wNUT$%({+_!J1YPfj%_(>E_2>oc zu4bOqjP)6PEZDqee*bpluOJI{d$TC-fdX=uhg;38X9RfYL>o7&S?Ozurzj&cn~fkV zMrB}nvmR){_zl}J-G6PfW_N>#7xcTjnT)6Zj*g|-&H?+gz_j12*d$ZG^H%KgW-7zR zSu2(-F#U`bi)*HViFVqGjWFe%vSNxmz9$6;{X~4@nw7KhzWHl`4Q)0~{}H>SSwCgP zXxtakto>@i_^;cr+BT?JIq3#X8=Pw*BWgcbbqO-lQaR?)1$H^!B;xy>mG1W@Qb_;S ziwXVCG#Niw!UgZiChA)$-+1XvdEhk+D|XD3_mvkD^1f*@j`o)KWs{g`JL0AL!CyMZ zKX0Nl5R}UZHRJYnliC0ITo9VrM}k-Tr^J2dvSRNFU5tZXtlP=n0;c=~gzdqX zVVcJs*6U6D0t{>%5DW;5bvE&>+oa#n!TdNLf$T5^eiV=K$z$l26JKhG=wpgK^||z) zjc@BcKJk^WT9bXhS09KxwT)M9!~E(#Pnf(6PWJOQ<3nE>r@ES^rJ4Px33kfp2si1a zrhn~kZTwfd)aU*cODgzRUg%P^o!4(ed%DM?OO1cwW!q76)K~R5N38BDwbeC+;?M+n zEloW-7p~TNKnYzyH;U+Z&Nk3>KD6zRbW}V+$M2`OEp1Zv_~ItU{C{gQc3JXA>Tla1 z?Ye}`;c9q<>WIV+os`?`IQL!2A9_56{>7wFe==nx83S@R*RwmD_>^t1BD=kb=Wf&9 zi)VA2_=s)VUWCkUY2uT&DeuO!*-iY>Z3;YZYT}M<$~*8Rem0YGnf@L*$?ak%aYlFC zbV^zL=yW>r^gF3O%?oODIP#=R@MlD(YrqBX%58iA<(igwcfZ=XO=*cgzA3DrNbVj% z)pqtJ;`rL^5do3|t_cI!quZ#jtvAM7``Y(NU2mP0ZbM(YV-(`)?rS@Xdsc)nOn>Zp&34$ag|we78@D6$pK7;dr`e_ZFwVq=I_XiVfAJ^zY`zFUtBwatIJ;vT z|N2#Q{i`Sw&)p_wL@NQRUI+e$(OL6MoLjEuq*`&``zOzA=f##%W%PkzR79fHkc3&*?&Cq3WC_NMnxAv ziHvfzQ7iSizG>7-go~lw1+ngOD+O7bm#X2HsSZbW^3eUSonwa@&Hm$y2#egGy4{=H z?vLH>jc)e`ZubU{Zr1tmw;DyC3iIL5!<+5)IJM`<`G(i|ES!jDrwM55J+jtyiTQB+ z*T$W8E3w9)(F{$0K7J7fY^>hxd@dfN3$N?hc%&7pG5w77@l#LIzt=y^t(!xsl18IY zn8I|v+-MY7oG&#R&zNT0WNN0EPLcQjD6fR3^NJdcb*2E}e5TP@YjLh?G}f5%*1D}` zI)yx|-SSLtWDX}8D#znA@_IXSO}jnpj_Puc-PnVT)C6`bE8-<5MpVZgVYubTEQiRe z_cihr+t488c<58_q5i2ep$<<+0;lh+x?~qk$RuZR**hjpo&Vp^&9}{;)5zyEo=Of* zy}2%g> zk!z{ifoF!=ne%N*uD#%q(A-C;3%xX3z}cOmE}C!4yOMOPA5H<_hp$*=gB3oB9rm1q z$FX9eGL&D|h}A>D>W7nD*Mb!iO+6JYAXl9r8+Cc2k5K4sZ$9c`7S2LYT1~ec+i&SoT2s=)M>e z*VU8J>+i`;DXo!oD9=@da$!bV+J`Kk3u*aJzKAv1s7Fi49*o zND`^l^POPVb1qQ2w7i(wfnWj4?KPX^FP%v0<*_p6KtT=>5e#QjD>;2d7PB z-$dZ(gKkiJuV>{TTlUpqF4i-n3Luuo#VGIgOby576@&Db)a+>w6FuY$AzF9OO0?et zNyuamsJz{SN!#qZLIFzCM@!K+NEhMclBs?+cD7zSXqTe$t|LwBZ?(Hz0r+s;8dOvo zh>xOM`<`IA_>j(>xHVYIv~?yCVcNZKVy&iY`Hb@(jo#?lW!`haY^lSEBqN~%PiI2s zlVOf@O2>?>j`379le4Jeq52@kq@$A#r9o0qa>ASP-T6zg@FY8I@z_vIi3%?APR>Gj zl!xcb+qt}*>c^qm|G%jpp{L!{Q&di;kcBiE2uJbq}qyHCOf zsX_%Q_mNO}-mJyh@=~QRF=ZBt#(OmN`|8S>0oIDr*4 zc4k5ekc=!B4r!$%D)H!5@qgEA^Z!{d^b=9D^Ov`&Mf0#t?iL*Bku0leL)8p->AV~D zbp1tCx%ZP!UW?LsbX8G;S~QkUJWU&;E*+^lhuwm`amlKh5r22M?5@#i29~m`V-fCi z#M`w94S`fdx6o(o!X5OYj=ygkjwqX%@UL}z;pBSoxGQ+jqTYUZn_0h~MbBK+Wrj%Qltec$$qFLkru~x7Y+R4 zIR80`qIKl2x%)=b$AqEMz_>{IDsnjSbUJ7lSd{5Z6f4kh3}k>aGvD~8Cof?eOW4MT z66t;iEo19)?360r3@Z{<%?U@x**JjFHy%m1cR4x_kYj%)RY+Z#ozh>5igSxnr4y+lDXpmm6ZLNv}o82Ij8#s!rmB4KqjejA~XMsenQ#IZTtF)aOWQD45E*&xjz;L z(nrzU<3HQ!=nk8q88V-LBy7QK$AYtb9hIprc7`ZXw%?~n{x zmYMIFEtM1Re{L607Mfn=+2GZ7dCE+x%jEs<+lo}%w4%hB<5DgQd!atTwbhn7i!ae% z%`17e_@+=fafP{>c>*nXO}o_@hQFmPW&03hv5yOA5iH<@YAJJ05i1Uv!TC-G7iFLg*D~^nFH+mQX*8 z79ZNzJu}3IVf46)=aHXgn0pNVkX}dYmR$L2-lX2?)fWMz-O@X~^2Rj8B_pK@lPRT~ zb2fMYJ)<*wOIsWgg{hv}yMDdT-o<3Pp+-dh8VvnS` zEqa}H)cqf-@XH1W%sEy+B2Lb=AKwy&1?+XJWo*0^=j*$QkGmHFf7-wwual&GpENiZ z#wqI@>_7uwM!?4n+FF!YDnolcvJ(DxS#LM+MZ~%bvJ$ja4z{O(-%Y^l4f;w`7JIz` zhk9>tu-6(aYmD_3jz~uzwzDA`(?!x_dcK3TyT1*mgS9r$G?Mg~g70Scx0dH%jqYzf z*TG(G5Wf}s8_ON6-b0V?S~spe?qD?ySmajVVAXgl1&&?)Y^)sK`ZEqzMiAaQkv#2S z#lRK3Um^de9LfO|7Z`nv?V&UjrfZ_->GRFL8|DL_X~6oHwGQ??syPe}`pXCN^+(+C zozKO<4F5YH_SApusRl98g5@KRd9l?ET&csJw4ug7JQ(WE{05p3*Ov;&J;r>zf#&74 zC06WFNTiiDi#@#NOwL0M+JnA+2~Fg&B@N00Zpba~z6O4w7WMPJ4op5RYT)PK&=-*) zq&)@2?rt#d@6B_U&`Vip)#0zg(#G9Z?6wB64pPtd=!H1lV(PD1>8?Vqz_w+E@Y~>_-{PSY$-?9Y&BU z9E&apH%#PYJ;SRrPbK~OHyAhflGnF^Pa^$hdgSp5q+dXTHmeuig?c`g^t-`BXXr8bcov_ff-Y* z*e3$hul8a|AJwBCbKF!w4La!bt6oV1-MUH<#U1rZs^CIc!^Zk=4+zNV8hSjnS%f+(m@4U663x=XA8~lK zzMf&mQy(vc0%N@1y}aQHZ#b;l411_@xfcVkZpMF{7ZbKCu2&|w)u#CJK@^vl>WwRW zaY-r^+T66pZ3DsgT)lpog%9!zO!0r(_4XV@r*8N<~yjNY!Y?j#Sd(?$p~ zu_z~`P9N@b1=m3)jj&jsI`TGD)8%iv6yZX6|3)^go?z!|^>{%eTqkP{htKs(tr2HR zJ6=oPire+Q>5@IN^HRtz^Yd!chCeYci-r>^bM2W6o=BN)pA&Gszd1*V;=rz`H=+fT z_aAezC?`s!*Y0My8&j_haT7v!*l)O5?(~5k`vHru=Nq?T{du?tOMP)`ZZht04NcvN z5j-+vH4+N^Y=}GW=>xps@HJp%7uOpDec|wZl&3=L^>81?`h!<+9uVX9NQOKIDJW^bhj+|H7&2g@2$I6Ls+qTAJs{PGpTQxe;WcLm2-QVJJ2)ohZzoaLwME?n5pbM*a6Vcisl?K2AS_wBH0o zaR$~cLRQXOu|1r}ZN;Cvvliah&DLE%W5MbqK}=B*IC%FkXp-k-|odanKICPX}vR&itxOWm1ox;^zq(sI-7sY?<5 zvqG-^y;m-<0#3a$?K>;Bj*D%*%C`bEZMNFPjBhO13eLZ&K?Cu%1GGh^MgPK!B`q=IaLkvU_nG1~8mS|U$ z_XTL$_bM~*8v88RM3Hy(y;k1KMHt$9RxDZ2Dero*qd^S=!Z$qXs z-m-+}F!g`aD+h5JYU;noiVf!G|I^(T-Uzq;Z&9X*? zTStaBI=plu9WK-<*-b!KOs-e8Fbc-!kJ9~;$PI$+HL)2po5Hd9`u72^U^ zDy&$I(8VaXVik3we^_S4^g6C0Uun7k&AiP_d8Jmmm+JV-7(5y-3*DwVvCl+j7VLRo zIATC6wqQlVrphZ;c~8~xKDF47@{)z_Ni)8it#m8uL|)Tgw9w@WI%SiU?jI(dvB5(3 zh=_;&f|c$8!K*!Q#r|gUD$iMX?-4p1>%DX$KWEn|8+xZ(rkNgvz3J|()1T{2H&@u+ zc-l*c^u5`%mHw0^+zeCRIxF1`qFbe`wPM$ccoHBvp7UWLfM+9i*aep&?AM(h&uc}+V=VH> zL%0WP#d^tmtaKj>U9^Q(?0pf2vcQVHCom)1i{VTV{G@!lmgW?&A6$z=0K02x@UCYG zBGXP?roMN1`Pjm({6Lldu!n|yTFpPG(jSUr%^*c|StFvOKj`&8P>q=A55%#Lsxcf< zge4)H?H9P$UI>0qHSemzcKq8tydr;;)*AETimj0JH_ zuja*78a_&@`O8(Buo~%{E3ARzx-#N0J(DBSO;7jQ2H5k}yby8FW_sda{L~`_8$b83 z!fMJA(op?I;(e-`KaB_&H^i~E)%=MnY?vOZz_GHLcT^$5V*Dbq09#ON%=E<(cFGc3 zDAT;*Vh!EATE1{A<{WN}<9Bbx!m(~TjQO@&<6020IkkK~Jhd5d{I6Scejv|KHmg>d zN)jkuIo15GD($v7HWQRp##|5nNHxEs%9tYpx;)R8dOg`U@7AiKgs_ca8DakNysxmv zBqQA)KP~Ze|Ah|oq!Bg_rA`*3qAZadbl=aC@dRWJ3tJeL7UrLRO;t+bS2L%`AsI8T zUZ0lkzd8)Q&buK3dWre2F||gDr3|sz)i`IxG_S}9YAx0!##HN9`E107RC5_HHUM5h z`8uYSN5iRH;f|NcS7F#xV3pgynqVv!9Y-u-qO2KXMbt$&5{1*hRvGUL2XuXFc@R9b zB-2LF8A4b>{u;?$ay<4^67L2{Km4=D1R~(6cOSlZ#D@TkWU*D=IR-TTnGcfH5bz+F8UAXk2 zULIggTWQd*MSC$ZMbNNSmo0Kg2&>tu#ab{{<Gsr7P=;PX|L|b`Y_ULml z@w{m26XE3%;XJq17;NR~slt9+HIDsG?}Rw^%vSzvr9Lc9s#Kc%*=e9g0!g6q(ys=& zSsJ}RVdW?DyV9km`Eb@}t#rBkUFp&WausE*3~|y9$=sXskIxRvNSDSFoJ^-0iF%Gb zS%q^=;=DRR&lNzho;%ozdziG1^7@PT-oDl7XNfPu zyLGFmh?D?9h;j*RmWdgD7VO5Y=6sXQf?dDW+`rQ!yW<1iYfMbJV8K$h@>?o#9>{MN z?A9v5tDm=EvrJ4oXTh!%@l?(Vh{`I$7_*gMg0f(ow$R0FtuH=<3xH2qu)rD-H|_>3vzk7KQ-q$t! zXa)B3f9(lR&A3!d0A8VnPf|0l#M0sbsXseZ!-rN%$Mi2{@Eop;ru=n8#@6L%8`f>a zOa4LFLpN6P$68j+iS$XH%KB?r7pkh-#_k;&|A5`)OtI+>HWWnC6 z6ccg!XC92b3j{+v`GZvLpiH+fzFEn$AWHjGkcjxbQAxu^kSL!Bl8#EWS=ixn03;Ou z_DUK$lK;oTzsU?m`^fFzP|5!Sk@}}HzX5M?!(z@^+iF&PKrUEK7p$_%N;+NleP5Vc zDk;UfwU1?<@;~srLOjNMULM4GLnSq6`ny)_Ie}^Kc(M8URA=NZuUM*vnX5|Wnd6pb zhZT;mOvxRGyP3_^rbV>_5_542qT7jUoUzKtTEOTp*Wut7Wr%e}m?%isAw?z)(_E4p$e>%MJ5aHce zqrB0ZZidLi+72%rc(1Jy{oi&kCguyTuF*OzynJ#6+QuQ(N{4)9aQ9nmP_?KWl$|oI zD+{_X<-&m^>JT4(q44dIOaZC=V2RAhMzOt zl?WHwvz}^x3W#1I0;ag0u1-H){V#E)9)5sU?hRJNRe*+W301`PD0)|dyi~;XYmc+U z9oHiQ&|KcQ_Q@Q={wcb)Fc6cw8>Au8w@IVX1-(e$FAsw6$8H(QE95)U>$_JT2H*Fp zY0LrrULxOV9$({K`3m^%sitfN-xtYujn}tZz81b6=)lv-xcWx;MhAO*mAB-R@c)OQ+ThJyI*q^=XzvoZ^*0Q z@1ntrxFD*K{F%+`{~GKT%{Eo@cehB#!2b;V+1p#Z{+%#nG%Kp+t3+ZNPm%u$pZ`v> zEYk82Hiz;R)hVv9mdeAGVHm@#_1YYMllfy_Kb*Vwq{WZlw*}3|vg#B^SfvZB`YM@B z%$8J3Lrq|%EKStL_>1x(!dd8*g`wLDi!6kb>Geb3!D;d1H&HnAz2PW%W;i!{!clS& zj-gUGc{X-yb%-7EkcTDiYbar{Q_LwYE>9ODusqC_9Ek~wQ0O?R9m{uiAA)WDC#so|K$yq5krSC~S z)aQqw3cug|UBw%;+xs&`WH70}_JGVBA}eD2wp6bEB_+OqsnvWS!rvcZ)0fCBzM2n! z>rkM+SVrYU7RT~|`(+#&F=g1Z`&f82w~^}o^}oS}#hJyK>T)ThCP@3DrzQw3^pT$G z2Ou>sygKFbur*^3j}5B{;OEOt`HSS5p!PuJNRKJ)tD(o`tkmLcj|}}wA;b8pCvxls z65?+a@|`I}xmyU)zVZoSKUPrzLi{wDeJ^@Zh+mc*`Tkjo;Llb3pvkM1%sl9prq7ev0k<^cuX5xsCrR4jD!$ttojYZAsEWTXq#1X}>>al> zPl^srcU+^449qdM<)BCl+~eNTrYC-kdFUOCve3wb4{?|$T!$3l)E zul#91_KC%;(XEPci_Bg%Rq3zXB4ZnMQx(?{uy(VI8gzRVcfq00mRY4IMzdsW(<*ip zI~kJSCDpTdri_uv#22v!`dK`%pwVA=SsKyd_w*w@ucU ze5_BDYc~p3_)R_D!`6A!ogt&YPgNfN$IT#h3##}sQg=G3`*an5nADvnv*jLjua|=k z_prx20Kv-pXQ*9&QTNYKk6iDBjkkmp!Nzk2s3#WttQ3X0q*p~$mF+#kAdQ#;=GngZUSFQPAoT4z)Dt1H6uAxpD44d+>taUP`sv8@kGQ+nINb;&5Ii0NU?mps z7N8}|Om+*b6&Cw9qPu6B)8 z_a~}H#*l#*1d@Tz2C65nLMK})b``ruF6V=*QYy`MfS)QeE`>1ilwVCPt`h!XKK~!f zj4{HWA1gzh7*b`77ETsi#lHcfxSf5f_?P4yC49}kdg|;7^amYTzq;3*d(TV!wi1U@ z?vk;4(SK^JO-);-Iv=5;I6Q=k;+w%#6c+_kQIvwIDE{7`isIwpCs1v_*O$FtDXN*F zO03NGijJZ*REv=#2lknXis8db-U&?(;RHIF?Wr^?793l_KCa}g@GxZAHOnSyzUTIN ztInvJApI$`+It#RCj{laS?R4hskVFuH)!)p5)kbZ(5vc%36LB&I$n05noD_VQRPvQ1ZJ4cv zNRwo?#v@Ils*|J@Rr0@^(t>5?^hm>ZVI^M*596X<(#*9vqsl2QV1q+s4N&gv?-vuQ4KKTD@j?#?t z7P{Lic>;L#a~AC8O7U+~JL|#NO_e;cOuxvZ+vuP>{0onkIg1w~Zqe~M-@8UUEk(<> zucr&+XUkXAr+jg3rM$o3yFJ`X}4U zF}7W!Z9yY!Q%BiWUTWJlg1&e4^qi=D=J@Q;dk5uRPQ$5;g-_FS_yf`L5MG#EDW?@| zno?|c3!gg3wldO&O-3uj1QKkUD#D!#XQ(ZxuPq1{&Ze4k(oyM1Zx=io+KrS~Pp6X)5HSLDU^H06OhoboY+wyl5%huIfD z9ER_L?69S^%PnWyLm`pZkk$=bs5&(cThnk{x#)iBpL^0qt)C(tSQY+fHF>h?m>@Od zJbSeWcfb5eb&irYm80Ep#nUc^80=yQzgKBsKJ_yFK&)>r2IqSW8*Q*If? z44=ia$IFlk`oUN!Sn-o_IL=^AtxFQP40x(6$_mD90o zA>`;}US-#X$jUQPurb`mu6GA&G{>^36sXc2%dUo4{q0!emsoZ?n6!7nq`!{HU>Ufs z*w`dDlfFHcjVC5!Pi&AK<}l)IY@F#i#&|WBC6Ru+V<|gbPZ@@6Yjg@hLzFlh8{rnD z)W>^O$BF!2vHb0o^ z+*_yKdj_R9JKsJgaF^YdvkJSS66J)BDPbws(CrdtH9lf0Bq5K^|6M1Z%l|8$%S|5S zoS$^cy1MQQ`+FfL^1qT3=??p!f=B(YcvLsfKEZSIf5mgN$&>y%@xTl@M-%5IELAS< z!2V{jvuoi@czJ*#U6}J*~0q*!MSb5-(uyxfh=_ksvk^$-{+Oj zuH3?VN(7C5ub>$Z<3fc+Ac4(rj>@U=A^XN{*}K1Z|Cr@8>DiwR+CsTbUr16+yT+OJ zTHuw7H0ds*i7@4ED;94G*Sr|bhOyW!$~~5LP6|kk+L9i<1qCB&tRrIdh`{WzIPy-E zFZ|ohF=nDB8Y`zp5Jhs9_uo7V+rqyr!Cz4=mVbdaPEZ@lq%F!{V-E%7g%I(*-xm$< zz>x?jWOW-ui#N)^iWbnW8s@)+e*hlm@DAm6cf8^*P}$6DDM@y8IiKVl#c`uEWqaxu<;OF^4wr_3VtjD!*wDl)L^bdqQr6cFN=a>3@ofaYIM$DTMiaGn-DKd4#r-bhEvyJXc*c4p$Ldv_e4m^t# zC~vk2aj%gO#O9XF06IoS9v*>sb^0Z>mpjtcw77xrO}p!TJd@+!cc%qwF6WDpceER0 z(Qq9%4>v%5Fb3Fo1BRbc-{Rrxkr8;gF*TOemGi9twdvlpPb#A@b9Ui)g|zq%{Dy>v z_$MzyBK4Bv$+n`NW99s23Mnm?y;9C^!Y4#}bVm@I%J~ZK5T4MDhK+1_xpI{y z|5CPR|0p*md$IYpmjXT-i+#vYd0>hZY0GA1krQz`Lg2=*yd~vJmX;q*4x8Y&?18`c zSu=J#=61Bjv59L!rOMnFLXU66p|l-qrs$v8qGsRn{E-b|EB$Pe{je*n!jEN@ulBR` zOQ~GFx^IfiZG%&64km1-WA-ON>gIR&)b3ZJ{mC0A{Y~K9^~E_@zckoF z*QZ>xAA`Jf%wEoQNYkTGRQk_y-dIHZMQ#EWsGMyG#jpXI3+wyUTbhJhYHl_C^_R!l&Vr z1^FMA@rPVkZ`9AC^FHd5V!d)-3)WpG{>>Xf7Hk)~-8f%A(5oBdb&9q^v%9hRw)vNp zWU6f5dYG=)xXt>bTV?6Z;D;3BAf8%@%bD7`a3cicY zZ-;g+=0GxgNx1a0UOvdeR1NZlcE*iSd!XNO4voSOm}M>F5iTVpHu^j5CCZT}kp+(! zoAIgLc1{|p-G5G+sN8?9#;@J)9lt|<7=P$1&Uti#?0hqQPk6&)xn-iZI^eJU976-G za!g(x2-lCtuz$E64&x_M^-<(aWM6(? zS&e^PLqYwEk1NldBAYXuHCx8B%ZgnEu1$~ERpMz=a^oxF5??EB4%=Vozdt?baCMmR zczknuMe16E9G}0DmXqD_C=3S7CU4=kTs{Ozt*C z>5rK?Qnc}W55qd68_SHNF*gKb5aaa6Om99O%JNt#;EIfX*pVAzoIZu~YqBGP$f2A* z6*V^d>Mu!8%ys^`j5k7r@l{Orkf}KO45y%97Ly%5sU|?ljKw2822AJCO~C6m{qa-u z($d9CKZ{p=@j_$ysqFCJoO8t`iN(d^j5)EGgVaNjERau0&FV{xf5!O6=CEr`33(_7 zytWo?!_YGAGYcI}SFlUUl!F$GT`Y3HH29Fed|0|LcK$#3QU^}Dp z$00@Q_JqTJ)g=l0JL>k|rM&Hy1-kEbo=}1g-VdoS@V9xakc4bKNcM8!=?i zyJJ?`Y)V7S<~3UX(_>I6pUXL88P-Dr!W41s@EQV6DjK8X__ zlJ;R7Wd{Qp`=L1eMu{@6R`HsI2tE)-432k@C!~=5#g9oBDP#TI>=h>aX00J^s-5=P zhoF>faf1dIGP^=AwvpM$ZrS1Z-^utACLP27tELz>x`h7?4!Qt<(qecCAH`5XHM;Gx z82eHjF3NE>9dsy&c@C1u*Sy?oO}Z!s{5{M1w2WqFnLJ6Hg~S7mDwN&b*O~ zoC|Vwarq#0*>^pYg7ae7&l82u{l(;ezxhq+@4BDD{^E7kHRe~GG=}f4`+g;Sfhmb@ zxN$AmDW08pq7z$RdnY={^t~WY;OoV6qj=J8eAj{(^<8!}w4!;93_^wW9yPQbz?8t# zEKBDnn6w%DEz3l95&3b5ab=9^2vw8&;udr^PuFJR_*o1qqNgcWf=XI5^>^{VLTCK5 zV!AU+DM4k-L^Fa3Ay*8WrBeqBe>7Q))VJ^g#nid5PCPjdIZ(vP*Csl9pEkn7Hmt+N z--IkI0DL)Svx{A$^EZmMx)|j=Hiqfv9N5l{BY7sb7%M_d`m1!_Qj9`W93u)79fq$P zuf$mIl49M(d{42|2K_e1us4gvP!sxXh+%IOyOq^nh+$pDqEUpl&wFiIDQ5#@E8h z2I$7K?n(xjxz{j6*Vp?nWYHkqtY6l7c*PnRKV2!b)K8%Mt<)U<3o;4=vye$=N)J22 zCOU?r^opLX2>X5*nq5_*Jmm|A&niZJKB2@|Yr#s3(eD^nqOb8`!iMb9675MJ#v!kb zjVw`CTd-S-u_i|;F;@C8VPz(l=ucQMmkn#Rqe`?DK8%eo5iwTsV@^%%7q_R>b5^p$ zv?og};omd#V)9~6&FvT0`7iz`)L$^HDq9{Stu7zmFRrB*g|L6#_R_-brK`eYUI-t- zww3a2rRCvrV|ZletEKy1E!`d7|KsprGMMgfqXtB`ir))hV zo9pvpq)D!fAc%O1>3@xZ-CP-gSVCRzGVY9VWejk)U2R3FDW${4srtAdTyK4c%1E5y zRXDkHykCLC=`1;#>*z9)e@JZU7&Zyrsg&fqY6A4%dJ-KSH;1E^68VM|zd~0RZl$b9 z=}Y&(8^4_Go{NSkc&eX`8;p%8EsN?iskN>?Re#j%_s+NJcSo>dD*9joJ`8F0& z+7!Hbtz(G&9Y1#D742M4_uSQfQt+_BVeFz(n)(S|H%$pR7t{ldaYiED&)M$R)tSQj zmQwXQ#5TRsc^fW7*!|*9%wEbDz5@Hrh&eWw`Iqu+a!il;Yz^xv;djAdOp8G;;r9~$ zS2&bu=pfYi%?N=soV3zCu_!Kd-J|SuiT2xB5uC{H`NPJBB>hs-tJa}%An+gm1+Vcx zbyG0^wxq71xG99x57F{*L$$E&sf>O}UzQ{~iaoU#>;2(^t1_Xt z^&`@a>hFF@6}{q`XRGn!dP(#QJu~VCzl_d^cQ6mpWH%aPr%$=%*`M8ZOM1B^?KQdG ze@g&vcAP%;+V#J{gXE13&f8FuJGOYd&=ahfqK!I3FC`K$MZv>}p|6p6vx!N2h(9+Fv6yUyT8 zJC4k`DQYSXuB0@f`JASgl{j~`Lm1k>hL_1h8{(~xqvMB-?UyvKWTM02j}D=?{fl%3 zs^QtHW2u@($Iz*nvbuDr>Kra@ScL=aie&ox7!^IFJbYgCsK)SE;uB5ojU~ntl)=JN{B&klmnegTlNY{>dMTwui4;zDCE7NuTh=Z%ofAs=RFQXD)IF=0=Nyk?RYQ>*>C>5kYVtFUf z@B@cyiv1n9mA*FDl)+K6qo&;Q{9)4sWG^9cw%i_Ey-Xc4RULUFB0prfx=BIghsf&O z;p#n#y3En_){m_D6`ajC5;M3EZdZ&jY7cAt-?1Bqe4ih=KS8ixeBv4`fBk*&Dax`4 z4-1=uqld2=GD#Y;`E507h&pPJdRv6LC`|pwML4f6s-OB|kkqz23ia9Md%9nw@bn|T zK)w;5anHdtUYW*qGD*BcimD!|Lh+G9)oDZ2xr5YuBGhGJ>Kc+eq@TKp(C$cdtl4+tGsC6^*!Y8pVBM<}LH(!DM=lON)W_si#)a=m-|Z24AYB<7zV6|7 zg2+=_IPgH~#~1gd4EGKOCUSNHsp*H+^>3P!yP_Qx`BA}oapzkuxiuO6b_G)TQGLVX}i%_rjw?5D0L;|wI@eAfqO$fC^6R^ADgCZUA&iK9FMWR_Cg zQhdBMtj1426|>|TSK(AwiZd)d%|+3o3FVnL6=Ym&+cX@lPn&vjtoM&z>Ja5b~&40!U(VC0v` zi9za+Yt@lc)QPF;w5!y)lhk|0tIIA^*CbQM9HVX;Nf}d7D=)#BxtIVswmYhi^!vwA z{@JY(%4uzA5UOE+ttFI1cSd@%m9{h#sUBi12^BSmJ8|@7G7erg>ngIN9ts`WT2uru z(xFa_Ri{O(b1zZviBy*jR53#i6++ZaeMtq86FV=U=po;}V@JX&p9|UTc2Cml$zi+B zp`M>Fa+(p0JWOeHv>(zaL5+~rg#VO#C-$kaYm<~ag3%Da(o?+6S>spNO6j5;idk}) z6@I}eRSq$4s?74G$~ciK8QxS0D43YveG?M&)0oXSQK}^Vuc_j#m;N8d-aRg=;{PAt zU0{LTL0FI#c9+YdXH`TKwDS5+)G$#iOfh|DZ!yoBrg^D%7sKLWh+GsF01MHufG%FrLhG2C6{4^T$bO%57OcMC&*$;`{jsxi=K7k~%)I6`uh(m4SbNZP zk=E&JVT|FmKqS}vbRQf=(AKki$2!-2UWfMp%=Gy!ecWD=*PytBRg)v%qvU+P=23E^ z)F%RVFFs=pABJKWJK-UsvEFpy1&NyJLl2E!jzMYdztpvlO*V(o(iWgJF+SOg1QBQR zQ^_fKd=TRWq##FpvOpaCT|vih7&HNf(7(tfrIo}`N_>4~Kw_mOo-n4(1xa#G3|*t@$IURxw9|Kz6wg4b&S@=pY?s zeh*4a;<6LCw6vlkle?tQImu131B}DK*oMJPVmz0<<$sO89+dcBj<^>(r=<<#6zBGO ztUW-_uR+TlvwH|;*Du6&=sSb&7y~oP@GxUo9AaRD4FA=k#lFJ~yN9C1zMkYFNXi^v z*VlcW4Ik5)9;y`C`-kr}7tQf?dEHlLMDTYPA7@pnt%||%YAy|CJXMV@7_I{6Wu3!q zRaAXUNmiw7r2%lJ#Es%G8yqM zO)B9l?n7D$NXnzbd$yt#oBwsjdk@u55wVSZL|>XZDEeBS!Q~!o_>RY`>R?3ntsk z>dY-rR2_EUV#cvxcBhxo#l6y1@WQ&HzA%IWJ(**(yrwIRBgIulSO=hC@xE4U(3)() zXK+bz#b!i_C8dVL9okq>e1NMO?u--jcrYynd|#ic+*;~GX9m6?k7=a~xGR~qG!d)t za>ZcOv`bEGix|}uc`or4RmYhT|U#ZnB;nEbg3@Iip%K z@P)OCX;x*tMvFfqXJtOdk+kEm!muw$npcDN{Nd&35@+TeuhdPW;u1oF|IR~1G~Di< zr3gQ19^A0#A_n#$;CJ5fDtDn%hdpL*aw+#csiCWy6aNTprn4aPDQ5%`*=!yUZJ2 z;yliGZ($&t`gw6Ge7_-bFT z=2ew0?02*?=pu{+e;j~ghBdq%dYP! z8L5$(^($!e2k92iIcRi|v%Zt&_#X+iu*G}MGAFuh8ixBCCeW_Jq-FiR_0i@~N>7o| z+d(DS^CRdD29Afx2kK^vK{+Y!=Bz)DIt-yh4O3LKCW0jai+A5ix_FD_LryLe{6(5e zVT_Y#EY^wU`U&RNa5H)TN~BX;q^u!QS{*5|C?w2;yPf;(HqE~&JVGuii5X@lLk*g7 zhGk<6YeyNjFm$~RHWUTW_1e!6>RS@33oX@#E=vZDrP}KeoHB%xem2TG{_e6I5blJu z@e+SW#9z7s#~wr28ASPWcfD$`)p{Q7d4NkHLyY$}getP#&3bpUnZCfIcQZSBLQF?N zg;~>bg+UW;ST@eEc8p;Q-K=Sb8;a;=O`|o0`jsrBybH8Qdi#vi!6H!|FFs4ojV-1M z9p+yF>Yn>F92$mk-e991l+ADZ|5-Mp8!Vfh+2 z7Y9bx?-TDfPZ}{fA;N3q6V8=VW^v$1ADKF>KX`u~agXm5xdBHX8fL3#X71!ZI^_9# z-eX~A&3#V4bi!SXCZ-P#e-Fz_Y}$R|vSgu8rAw`pw|l>+`aV{|y|Rz_u78cwyb0RH z#>X#5{Z8fe>t&lh+DCHd_-G9|e&pjNK0bzLTnsPBjIa2t7;fH3r6;(-deHm2NRtLv zx#m}Dv7Kb=aF^;O7wgV_XPl|x5$xL+2tnEZOvitUp^bFg9;9PWAOz_Q!$ba=ROoXg zee*$5rXL}6uE8J}9M1K8A6L>->LW9L_!1ond>~ytJ`&e>(6&BL8VJvI&t`~L;MMUe zFUV8;z&H|cx1gFQJP-9y4=(-nV7lxS0G;Fzck%ODB7E!_i{ZA14=UF1?4Ey`)G zdlG-*HgC3hNAeEz&5~U4sxwDAznG`@tvun?_#?g#z*FgXEsJ>bxcrgT%6VMmI`Te zB@MeJ!c)POu2U9Ina!5^Ul5i<1!F@W+Y`vlWI_u7+*S zhFmI`o2`nTmGN%s2(aQwev%zhJhqB=m!&3u$%Wm3PD-v9>FEzk)Q)qm4gCP!hLmq|={L*Owj zgHns#+mG(08`*CAfsk=zXp@-cIZ3scrusRm_$uj#G(|ek%HOeHlb&B`I=A;}D75LlT7p2=0+CZSz~;)w-66MjkfZN_gUehcxN ziC+YMA^3UYC!?T({&>*jcN=Rs=r!@tL>$dnPwZT=*YGiv#0`e zChQDV62vFPew8gIOj_fE(&#*W33m@4*vnjdP!{xg`}dMDf=&02`u6P=1ff(0n6^+HAZ$Gc&whw*?=Wv@5x6Sd%oI`Tt7ZY+@_p%oUfJYb? z57_C|NBnm*#~Y6FzkN_X!d~5L{qJ7fC;N>K`o2vl@*VJ}Z8{vP+ql>IGGZ8fxNz$o zF-+^<^S-!O6cGwvi-D*Qb|`o4+zSrCpr8B-M(ON*yCK34miUI5k%o}DxHrKWkBc3e z2|i2KD3y@u)0|0iSQ58*FE#UIzC_gyq9k;IRl_9M*%Nk_t`ofS&r)LDDX;N2L2$?Cfodc=o|h0p%#$NK2PY_Gil zHPLIYX>I~`=5>$2dsjZSF4 zL_gcsBp9M)Q1-%a8J*Z(K?9-YXFBiH%2;=>_K>&Xz)X_a9`_T)jLDEjGzl~7W<=*K zci!&(jq4i)|*hF5XJ3$0#-FxmJ}SAVp~+%j>- zB~{%CFT5~@_||cSL|GjpYgk`ZK+|(W3r%aYvG(`g4Oh~FB8Dfj{ddCLHh$;@3s|%G ziesfL+ZoxC0Hseq(kwKlaV>l3Y`f&lmg%^!2K;;}9kb1QsN`HYpo=UnM&jy*eq{p| zDn-NyYts96*tXV{Clj}-9mxq5Dxx$`2nMz+m3H$4uY?J~mWKNF>#WtoZ#Te$!!gNSUi=mpWp8ld_1}C7#=ta-dN#9lr6`TtHiH1ymCmt z8t@d+F;!iK7g7Kg%PK4T9VQCVXg`!_XFxP4f1mO)qJ7VvJ%agfG)G^&r$AOhA4yAf zx4kICA(aBGII-vZ2WSP?QINDhICxYtU*R$x8%4Ilr4}+G<^z{O8W<^I<^7m?i3bYO zD)>V#!4*m73Ufz=ikU_WLA?u{jNYz7>)qvwC=6g1_Gw$jvU!Wi#PPH1V2hOa)}HZ? zo6}~`gt{2~8q$MSyuQa_?vq7n#jAU)Q;MLP&Z3ha@z!;P(7P`yWOM%-x7@lt*3S!Z ziY`Y--wIx{XF&fQVs^|A&bY@qrU*19TX#rffYd%T)G#H?5DgZ7ykTvGVar5A+9Q-d zcIQSH@)P?h)eqT^aT;RYN*(alA?7CPz*Z`R3!}~q>SR&p&H!D6NfS4JkC65^xSrTU z^&tEX9q9}X9q*W{Q?ZVWE@V^x)(_T4X`4nAGTZ;kYh73Xn}f-Fgm?c618yGPWA!h@ z%joY6(n6~*KDqIG*yMkt8?(pydm-%g;~%8Ej!(|8M@aZbI{h9|Da~vdOy|&1>-Mm3 z|CNs0S|X~97$}u3%0*%fRd8oYV6ye5u2G7c0^w!f)U~SUBOnc98{+@$=%Y4Co$DJ& zV7o}f8rC;B>N0w}b9FT9W?+Q-6u!T32)rIGp(BiWUPPCu?{+DH$%(K5%L5)v`>14x zl)XG03q<7*Y9lsRGyv1FPf#mCtTsZ7=n-Baj&C-?5iyMq8Sn_329mPtMb8;2y^e%A z(@%}jDjhK5nisAJkkQqPU##N_2K>Sz9W&LLt15tjfnd~e?-x+(11#9j=tvI?9Pyl$ zf_1c1Jvptg+eDc^8h@fGUIwr)>1i zI3-_Xhk?{KL@`JEkSOa8PQGcJ`Oy3L_^ppY*$U=iIBcj7w-G?H%SAxg{@bYGEb0=z zBP3M#n%25F0O^2R1>X+`&#fYMhXGXH&dwIRltw4`Sc ziMt_44=(^9HnNZJ>t@I4xV^>Jjrsf%9amC}K4Rc0D#m={V|849F>u7d^Jo#K8Ve*o zTr^(^1acFK!jvrj!;5gJ@fjWW5Oy#?AUC#%%o7&sNGQL}8`bRq-k2=34R}2Uj(`aV z8ierxm}NXLWVna(h3#0P_&RFM=0hX>@1pxCO=bSA$8;Csz;u-G?f3azPC`CR0L-B| z{LuTrXcz5(njm3BFS3s8Wpp-XoQ!`&#@#+DhiJke@~8A%!pY(X=Muc~ z+3^lq5Qpdetj;Aw$9yuJ^i*@b@qJ*Ht`DK(g%IXvco0Z%1BKUsk9`h442zlKpYS*o zCv;XYuHKHGa`f98)j+>7FRB2avp1@_S&R}Fd%}3&MzM|v&-*W|N9O6$O4$?GIzwre zE^cre%!!Q26Z_H}ffmdvAz~m?8F%`pWr$eKDgA}w@E?kCw845IPk2-p(&qK3sRL^P z+N0pF*hoN<`Q%8L_IYq^#nx~0V1Ta2WQde~N*{&|c)D zhh(VPG)cze%uY!P3{pQ1YJLQR?&Bb?p%?>lRWDah#~1UEj{B63FCJs?crlsBj6-3< z^B%mqFQbAxf;tj}Fr8c`;Xa{$eyooBuz0?bAEV=}#bHWjjE?RjWd zGw)|_0)t(ky2G3Pv8O%hMOQc3@9f;FVkQ*Iq)jgr>mKQJl`MB%9W=Dkx!OenNy@R| zI9Ji&BE`O}o`94(58aWw3)QtdJo^<*?RbC}Ll?>VMhvW?v{yqTndd9WV30(IPa>k`r2L0Yv zr%E~NGemHb2s7?7!-oi=Qf@jrMIbr^7&R^ya}j^pEucf06MN)$2ReieK~cvj*>Dbs zDjz%(9S%tzGI8#O82I?#USwtR1f`C9x5&COkA)|>$hsm=Q0TbCBI}p=3_R}?S><`W zyN=shWYy#`@C2h2-i=OhN{3e37eLS9k#zc=`V9|urQOMUI&bsvWHwZ)JX$q8nSV%X)X136bjC%b(uMv% z{8+pNt2pYfbP*bLwHOcpYyn>(H4ODJj0iHU(itZD8)gqTF7jcPLiU6uz#EGk6H4i{ z9#Vz2FM4$uEn&&Hh^4`7Z^P0x#QIYa@vJ&*wiHXMi>1QwV9eu79s~5bOjhGf+fD+7 zRX706``$&79(0k>d1DlgK)H;P)^O8<`>IGB%vZEV4e-NEbkKNF4`JC)Z*&sNVON0q zvJrRLa2+3OH%`GqhR0m#(!5vN2y>)>PeMVE5I7Y>eh4-@?aeJ-CiM zVJikBtMS0)Po=vfPC|&tDE$o~GMuxwRu)w{HT)n;Du(G?`vU`a`Z!_(7P{G3O({jF z@-Rk>s(-J@YC&?7n6^+==rD#ye=8ceMQo&)c9Br&0WgLEF-$W>48{wHz)l-BcB`OX zppz3e>W@k}6BbG`-*loMysAgmxn#Cki}5DXQ8JeQQi~Dtr;Xb}NfUcX%e_HKlW5`96- z*Y|=VvUC9#&T1(+T}l1?IW4yWi#h+jmQLWc_qmcn>&v^Dvs!Li0geA&%S|bmuVnB) zsUS?rf2ZXhDZt@F{7)>v^!QfGMHGN)f2)PGehA5U0TgH+kxS8jA)4|g|FJW(0S|xO zLzt;WMM0P$i@>x}TutG}-$(FG8W$6^K*7@&f7$Q*Dj^3E&i6HZ-j9K! zs5_X9-5a0cWQ=g(W)w9#ON=vJxW|exIj%JHbN?zL9_^Jm&0DzALT2TCy6C9ypyqpE zdqDsGxXnERG*NU-1UC7TT6Eb=%E~F37jL8Ily}7ic{6Oputd(|BOz}_&rO4z_&BCD zc}W-wLpn%@Sbve5I**}NsDlLqhSqO=5K>RThnL=CH|eRB3IB!79-8}Ru&zQl1H&7c zuvj!cbBqn4gK)p(Qv}zcVFVAP1&V)(fpl6?UU{-!$khTJeIVn+NB8h1u8>64=;!<( ziyAGdbY?#RO3J0=_$e5uu(rU~j>0w%iT=;RB>@XS%0YDrNI8tA|MsyVut6r(hAb($ z)GCOne@5ycq(#atKOlRhvm{`K4;-++X_4!_-G` z7(3A6VOGJ@`a)r?>xHv`4vQ0@YX)?uRP4X|Yh5d$Rp(dq&ohq9tD%~a{SMsWKAX=# z_rV1SwA++S+Th+xe^d--cEETM3**lsvN8VjAB*Aq_I}8Zr_e?GW6%-)FeN6(u|)Gn zD1(2*#>ZiRVkouKa<%N`K(d8Rf#_T8Q>U0+g}RAJE|Re>bNq(lOw&Lg@cSKTpM&KT z+mcJfsZC51@V7L~A_vm`N`ozrR#+?^vbxRf1bD*M{{Jbb5dF`b(2*Vr{J#^@Rh=#V z{{}-i_Rlns_KEW|azmkdvpcuD!1`h?zgx@Y6jXzm%GGk&1(Z<1^CXxcAqPX5xkw5A z3CaUWgSatth_fawH##31hHNc2G9T0k|4cqM3|U$(5=<3~e||=c*_uKpeqoUM86@;;V^FNyflGvCttkW;cVt z4TSTXwAj@yDzF-Iaf9(j+$e$a@sf7pn8E~V*Gz>-vgVoG_5wz8e{d{f3g5TaDhxhg zUbeEDAmUdk3ogrHjw^`wnveNj8W-|SptE|#5jTHw32r2v;Bob#ag z9?R9`(k+&-L`#MlyrmaSRIel4>?%;*l~O(Sc5a7W5;Q~dD|bEIFm#wP#6>u$O>vTx zNo!QiGnqmy)(Lj6mh0SYEy-bw+J=UHlowieFo*ZHHLp^(scV<3la`a7447^)=e1an zNyf^!04{U4u$bOQ>5B7?ByjxO$wu!X8LLSVbD)bZe_MFIi+hhs7f1#Py5zd?r<}I9 zAa;S4+XqbGx<`EwKVQr3Eud#&$pT@XR%rrLpvWpq7R+5-VL^QQ#)HY0IB+$r&}Gr% zic9A+@=LPcomdY2M93jhH2zQy!*`)E%`4Fb?}_zJ5%zZp$7!#P*76f{mN>oosGG(7 zDSs8YV!3Sv)>S#Uo-#m~t>uymtk31(rOp~h8lfy=WD=Dx(MZXs0uru)RCiGg|h63x8v{(=JF>oUV)_LLw|1WL)AMuw~{FuY7 zE3kIv;}JC~9I5 zi#SBQ-evSx!f7|^x-y(!c9gW*Vd4nmh-{qH<6-h)ev@lSJ)KYOU!vqSTs4tpVNdb zt1fL}@V~a&Y~jyp!nRafA{hLq;aC*^y(SE7*r&|*nnqvA#g)2HVO3XskI53_RgY4+ULe+7vx-Xr*n(@*sMHI_+O`EbgW@rtL z!6i6?P=!vz{--O9SYp&QR-MKXEUfIJxbg6rrUCJ2(_GQ(Gx{y&^&sqQtHxq}8APH* zL(=#&n)nP$j9*PXNdub5`~jH0)`0qcsj2tMwZv#@TFF4MNIq?u^>#jk|Fzc6e7;#T zZHx6*K7;=>Yezo+rG`vt{WE`ajIQQ#{U7-v@+suZX^m2deR=q_zQ&y(((Jx2a>M#- zKHr31wO-9qnexHM_|bLy zg|6mbFe1vrNgvK%w;W1l$9Y^lq3hufYA}{8F^|9!A##1SUaRXFz;A`d=p8`E&HG$p zK2=5Klb4|InTzJ}(CC>!EJ*!=EleaIf=&Cu>2WGS;G4PVwjckkq6S>sHJ2`B8Lv%air zhzkT7%k+(@{n+ru1@bYNJXb!b+Z+=-?S6wkaC1yx&8?Q^7uxh*?Vu5Li>i(Xw_oyV zxfpzTW3IVbg+P2F5N*O#Y>4v`_GqR>SF3{AJ(xkOrj5H2Ir~cF6IUYVUx|GDO61%t zkqfRwK7A!Jrdl+*rntby2K)RWCqH#H;T7>IjYsWao+m$(?-@cu8xPx?pM!C;BIRTI z9#5ia{Lo=vMbg-jz!e?kf>$ti*@V@Nxvk-TGSMX%EW=NjB&->%0Q+1w2D-wA@p(`d z*_W5(EYVGP)bF9O&K3-LrKDmwUms9ScO#D$-)wM}JgOV|R}^b~5Jf8!K83U!e;)*J zUAwI(v)SJ@-0j_z1HGx?I(AbI6rMkJ0~r1{4YxBFJSPKBM(%tie?!Bi<%TI4{HKC} z;IC`Alw1hz@V_k=o4jinqwH{q+4w(usjQ#5-HR#Gi<3_X3h5Lpe#*|Q!CV&LXoQQM zxIE$wD#5=5ANzVA`^LbY4Ymd+@~n^Ku!4qb$g?)&#SbC(+ly3??lif#XHX@|4tIwd z_!QM^?r^tU@H$*{@m38L2$>$**1m(LN!qZj#c@y4hHZ{dxD5|&ZwIM|I(Nf%M-dGh zwmUxIS{{10y@7kVNYc%%ChLo1nU>dR^#qr51Y+O;E_4#b( zf`%)|vu@1h&uh5dc@%^28UIIGdeX4AbblNv<#wtRXJmzihnGqK-; zNb^7FnQPchvxO?Wb?w*_3FG>W(Zj9bc@UEgH3F1=SI`b&0{|LE1fkeZ;hC|PQ@qPv)lf8#ioIs|NY-37?h*xP+P-c$=5A~V za=PX25-h`s(!5Anp81L7vZ2hL?y?cg-tIDIHoLoQwgBHmeqT51qXAcRNGyk$Ld%I} zgFE?I-KGoOScF4qWg37C8PdNnD+LRkpYZjt`#Z%=^zZgydfrWxL1>mr1v3%|Q|Sce z=mehH$4?GyPJ;NxHPEpIF$m?|ChiW-8+_htG54tNdR2ekqb~MRHmK)&Wqkwjyr=o+ zI`vLZ;tIxcYT)a>(4}H4$&SZniZAQ#G;g)#cp6s`^uZwj#~kPei=2eJH{E&+2niR8xUl`rKllG4H2=1_nxtQ-m6aYr4WqqZP*`NccM;x z$h(cZ(*i(hy&;uv2P|KClhH)mY3}K`U8nXOnq+QDNUl?l8fxNZWaZSUcX(OMX7m0! zb&qGIcrJGe8{Z8Xd{@98g<;XdhE);Ofa{SYw@Cd8&JwlFQv~E~i&h(*dS2E^;-FhO`P>!s^Dp?tgnE?%dr@ zZ@G^cE+%jiGSMf(&Q5Sj?zqgONm{FDXzs5&k_@Xj7)BZ1i!w%F7cX~`7#2NZST#ZU zeM?bttvhJ%cJvh=jBaI8X#WMF|Cnu^AR1fPyzZz&b}V2&VO2}P?Ndr>81)j)pAtpm z1vv80?Ibkn9JHM&^>(s03hJpqOJkq;{hX9qdnFD*1}J;F9~*65wY#cSMbx1!DWC5AAI$X7FBQYUodVdN)yK)g)u1izQW7^wHA?XH|uM7|=myhCsUO=kB)7-EBUe++55( zu^VbH;!f?o6lau>IbSUzZ6(q%&5u?(YbzRi?G=S7f7q>$?Uvu6G|<|;3kTz`o7nXl zSbWMb`nv5RaYEd58F$lU{R0Wumo(O!_+Oylel_uH9Z5rRE7BR~u@HG<#ND=w7Bp3M z=wPZL@}aa1;Q-X9~F17cg=m1yTk=0otDnDM<4w2+F1_5X-apZ{81qC_*Qwqx+A)GAX=yHnO67 zgjLIfP&3f62Z|aNzZe!&n@Om5_&!g7fPO!HH@2ewzV?DE;O>UG(CWRAUDoWy= z0}J8~*rdd_dGh3NwTn%ZFG&N-;xdkwMb^Zek+DCCNT(Lwz?;y}TMHV<9t$9D64?xM zmNhHOIV(A5x*JX=a@%$_nBT6bsj3P;9Ht^J+QqQ6t@x0B9_j>S#O1(mM7el;xW3{< zU0wLqo1Ns~l7ou2FS?g2B_1=zkGB%JZpc;EnXut?xljL5S1~~nh~RL^huq;%#Z|Q{ z4#sJXQGNCmi|c?h9^||P2Aah<&^gW(ty0d`F3$Ltq%JADld%fbC}+pMYNxh_%%N^- zP7kVG3xj1w3Z4d?z((D*Lw3u>0_=VVoMA*e`)@!W=q1Y858&^VB9+dK3yJg1 zzOZYo6WA1s|7lB%1Rk8V8LM3LpKD6}k_9ea(RbM0DOr{1feHMKsg z&^&O>puMN3Gfvg~Jobp5jGv@Z&L{fUKb6}A3p;aj9ro1+lglr&rT2+P_T1dm%bj|x z$W^Ism!HT@u-7F<<=(rMX7Bi8R>!4T%ASrNlPf*W$M6>dEOA4PkINdzNsNnSQDY_Q z@7xW(^9_uP<%|2V7r^a8-eX;xQ$H#9u*ThkdEK$3rf}vb`x7xyf!L$-k7-+%=3E|`dsY+lt44V%G&k|t z9GgbAy(o|ym76^(w@gvsUZ^V9qdbUA>zUjdWfM82KcZ^VKPXSdT(HZ3!`=J{VVd#J ze;>fK1uRgc;0zuO6!5r)ocS(5XhC3`e&~7EvRl@nx#sukj#jV_Ig!AUZiCv_U{Dwz z(S;tZj&zc{NJ39HM9Q#;gkETil%gL)dpaW}^Ilg@&>ArUV)y@K5sja-BX?Pro=#Fp zDNDTcO9$Ocyq{Ak8(%OhzYJ7{PD$kf9AQ`1-^i&uR#l^ASNl77iSR2qP%Pg8y-&QA z6Kta0StuQeAxSn}%pvkpj6Y@FzhRhB<8&y!QYm~6@QL>6V{IkMGAUno4YE8}hFoI} z*JvdQw1@dD02Yig{ur)#pmOgoM0yh5ltZQA%ohRWPTG)p6O1!)GX&@i-KDTul=1#g zw@`Eog2*28fnauvKRBO5w@A;lUS88nWHHS-#Cfg@!4zi$a9~O@bB6g?VpY||stMzb z)z^s(4`VgC%$qIwhQq$%l*@t7zX9PUFKwyFV@4j!8Ra}aLNz{O{P-B*=?UYj zo>9S_>^yMihW?|RR9SNYY%R@M?-k0_a|hOt@b`0`aGEhHa>laAj@hcGL&oF5ff*se zIBhywJoZ3&vgX_3wre#^1_VFtbSean?=sHxi*8vZ z9)tHxLx{H_!qc$udBe&$!_1cn4kUy1fL6>czSEkWlf5hFhzk4M9#k%q@78QvXDp=kWsp8YENO-$L2 zXY=AaPr5vm|>0QmIE1lWR zy@pT%4ZnVM&S)gWo$q=!N4y6HYro4*+#U3SiT~Z-94zYC#C^;2run!mNVxi-@8~DD z)WrPe|Ce1XRc&yKOv3entaQDnLVxZl6MyY*Vb}*nn`ne^`9XOIi!xEWKEC~LVTmvA z#3bTonnX=S$g|%IKhju~8~!4V33aK#$}9rvd_w?(Lu8`an{Fb6P_fopC}CR5VQtdc7d|7*igX13V6LlxHVOyGFSM7@iKDeh*Up$XQA30@WxeD${kd@lt9*wZkNP>VR! z*S*p`ywNnC>b}4(Q2r-TBJ|tn;T+-103_UE6SsfQcd0|My%n}o)F;D1ep*2nvpb>mav(Y@Oc1V zXFH&|*YSc%z1f*^!)KhBQeHp(Gs*mEcgZAV?-*{U1v zsed493u!=}`MB>a6By@t?v+6dL z37v3Hhvz;jM$vcp(jRL;Cty7gO>8T86~%=+%(WOoM}h*5fH9$9GakSx&t}Bqn{^e5}Ju_xU~hy*u(bkmr1ys3|`;My_YRcKMyc8$VMw?Jz#l;`p~GOtXRZ7+Ht*zVtT00 z6rN}v{465te}Hs86De@k7F1f zrD)kAe1>QN!y5Jx)InG%OZ!atmgb`)PCyA7Rvh58|5@bFP&9+O(V9NAMb$zwz}irx zX!wk?kjM$}LPbO~eY6GrHgq3+3LfUA3gI%~rk$n`?!sYD+rS_;qBI^wuI2F=&Eqqg zKZ#gzT=5v+5T#>09_0bg78Wxsln!mjtfOI}0`i5jm|USe?I28BG=q+BI%E~XhZGJm z0uy8kMtamPJCg3UMcsk^fc*_960XyPXtV}{7jYOuVoMZECxxG)oW}^Fh{o`{Kz%pi z(8-a;QsnSJ(^?*%(R8jv18USRQiKmg_^T-VXjv#*Mjdg=(fLlpGy?L4vY2h5ltO4& znpqHM)_&^4)YLGDP_T63^2N5O|3K=)@32L|5^XKBlLlh^X@oTDY@sv^s|7N?0S7CH z?gkT2i+z>09zKPzfjS%EurI^m*HPa~G;9s^y-351^z#MktO8uZTXc3I?-q6|jeQdi zgS^X_X8O6DI?q$*KQscX=`vw2r3dd(8piCW^k6q_MYc`e3!F0;V->XP6tv4V0t(Tv zm};Sv7Apkcw}?FtA6rk8d`h#^K`Qnx19YrKk%;9eJot=e@fpp&hcHx&hyvi;i7iAW zl;CYawdhV0jmf1B!Za8L2s;3Wt)`IC%s?qhNT2ON)7hIxhg{ZwM5Pd>kC$O?QLhZ3+X-Xjqm;JWHLWaF``<*u~Vhh=whsz8D(z z6#bk}odtl4M*&-C-YpF640a_PA(o={Bl;OdojKHbf<~a{mhszdVWF_dvG7l$6%bUT zuP7HG-wT{ml73ViS}Z0+H2*PjF(@gN2a+|Es1?x(r(h~*nWdDzV1R1q5LXDh=o|rJ zrZ9AjF~H7RCWAVev?I6EfsiQDmn2le6aZ#ai|*V)3#224hDEc;hf^VlV2Y^^B{`-Mm1JIpLrGbpzADe)8 zcT$}n8w@s|c*}7C1M(sGn{@F;JRwlr9Dqln<~Zr~U$!zs1Gt%4)f^sXKc^CO{*X-5 z*Wk|S8ThAXS*K@_b?3ratv@$8iz<`PNqLQ<4DhuR9T`#TwB7{@=pMqem_UE)_$=7g zuH(jLF#-PMt+6y>bQY@vcu;{j^Fs#tAnw(j4i1d9W8dTOd+_}*FSgx03WPjoI$Y#E`VHVIbJj(sur++g%Q9UCu}1=Mq~17-1db?6SU zEdDJU7c-EUvFU*L|EH|}UDlD{?RYPLD zj#e>tX-AT3tYUl74+Z){9Bia*iIXUkyGL!vw(OiUd5yI&ZP|OJQ{&W(yj;b|1J+}~ z>gI>oVBW4_J#Ac8`kT^<_t@I*1uh-0sFbq}&i`tjh$-Y@>oyLly5UrJ!>OvdZuK-# zU@K?!8sT)edgXnCkE?OGYq<>j4_qZ1IwS7zDsqhr#tcq0U8`HaI*@{i8n_r(C zjcX-@`tIda7+cr9RLb7fEm!cLbaQd(>zykse06uia8>Z|V5QL@H*~W@%r~48ZaA4! z>y}Is0dNs~H4vKAbvJhZ1*e!S3&{%WmP~degMx~w>bAP{_G2QbQS4z6PT^y~Df|vNA&%u972#yc zx+#QvAQKw9kugH0LgsPJ!S%w1{9qTlqfX90o2*rzLUrFYWI5g<@?Sv|`=tS7>(!inP})d8}K0OWfDQ#O_*vODp0VELzuUts9K4 zOLN@nzuRFxm{odw6m&J`T)E8?Q9prn(QVIF0V4_)C5|C8)rBE@@DaoaPgI3&2$S!nSK zZgb|6Gii-Xi=Vjv?%wPkK4jOxepo!<>!1@f+dj(5P)o#YrIfJ`R@)5?|LfqattZBoSrKSxL_7 zG#8u-N>e}UO`Kp2q(0`>aQeQwPq7C=C)mDN%;`86O|?=PlQGwPBqgTHKU3Mnt)F+c z1txUdr_zef=M&(zJ)a<)_vC&QX_#;l?Nv9qaX;)7r9+K=u*N_q9OBOIWE%YnCf1BF zAIUi~RGmyxS5+$8-EcF5-Q4mj==0Y*h0pv(ONoEukM?0}R&uAs68Hu`b((YPsaJS2}^*QUPu&&b-H>@l&RlxF)_r^Y_J#hfVYLt|2!?31My^Si=c7b3$A z8(j5H>y#aMYv#Bg_UF-D)lLz|{4u{;Md~)O(CIFd0g?PuJ6Apsd4xtT5+j%5v|*#o zKGr3rRXnS%5G0bs!kwc>O`bHk4utRCIohc?gt+bS5nBDrXAOo2Z;%Kr{^6NB3udJ{ zW#R2j@m=JSYt}70OON?;y{QBHeCCi}jg!TkA>>dgNe`8tiu?DtcXlpKnfuyg4w;kbs3yWBMTBz!k`+^Qn>~{$|w$ua*|vVK{I{L4%PSU!$*%fr%_?vlz`f z<@mBY>|7jhjv+fl0IJ~Xlvfzo)LK+>eonxBEDbhX&8*12kQ$t($`W6`5)&mc`**}x zR0{TSNVkdRu$btp)EWOs)WBFyMOsvD!b~yIqX%h8-=vQJzY@K7D$b%J{EWX6)i_nEuMIfXIg+9r$=BQ^)0^m zsR3A%xZ68QhkKG`7#+e*En~e8j%qrn7}mzfM`^Q@%=d#=IGcu^H^oR6N+lgemE+yj zg^`j7QJ)a8=sbE3>BGm+^btPiBVr^WQiuA4SilO38|k! zoWqqn+dsm=j;e%-Y4oNp?J-H}aQY6%$fDmsAf-COlymRaoPjv4EI6|8zxXp1r zSfkr;-;pR=e};CwKW1hxSDR_TE36^m^*1vN5wi_5pD`?a+OX0Yf@x>T5oz|HnRWZ? zsN5WjX`!llbBziFp%jEWpaZejGfRCY)>|`M+cQr3j5+Hw3L@?r67D^k2~gb}huId0|)-@uj?|i!KVstsx+P z`!2-z@X3T%??PkoCIrL6Yg9V?IyU&X>M|4SGfO`kSvu)OG6H2dMGcW;*JhR;nRFTM zr?M1ALPm6c*;7e6Dw9*L+7oLsOKYCZhI>So@+hACA_}m!m+d&*HG+!iL9&2@(rL1& z5t7!b%+gv};}oPq31l3Z((!(>JV26oFtc=$OdcZ1t`LivMvGZXd{X|jTMuNG9#huC zohC~eDT$IwTHnh&t8qVr^RyQ89^n}mM~x|;+b{0}e7?8#0(uHb(w9B4sW?(cyrU*d zvWqfHC(EKHNvs8#rBmyX2iQqDY_HGDEIq!Vb$4dz^h3s%o*>{=VTqxVTsCEv{!11G zqMnsm`dIcr`h)hwooKCRb|wWg3T6||WLeaAc57ND7?{{%Dn`em0U#Y7;vBWlUcW7~ z?sQ#j^Bv<$Z;7}Gl(Z&fg1d;_bKCgRCP$SickGwrGuv;}#Xi{yyvYU;+_=-8_5?wA!0?`1G>v<-b}+dc#_n zkihd&;tA>V7w%=h*4X&HL5N_#c5gb#xn(MER=nT-Q}~$LUyn+s*4F(L&ZJ(g{?1u> ztm>$~sv?OO?uD%OZ5XLqldsnONo4+q%U&m=&&X5Hf_APcHSfqtT0OMa81I%~^jZ;D z*VfqK6(F&U^sSW(72JGg} z$bx?gCrYj~9rtdGRZOR1(MA4eWS5-GpM^7tBITpB8J+psZR1K1SpIdK5`<#tDL2Z( zsl9RU{31ih62r`8hKS{+hTg=_GpLp!`A}OK=+W&mZ+^q=vJn2I+hq~Ln%iYEb~99+ zd~;h+x$dyR z)mt-CWhHL)RT*Wvny^&TGz}~h@i^~TKX89tc?R)$0*v*H;Ue28flh|FE)CyWH%PQ~c1Hc|3_)^3*m7?HfB;Q+ zwYNNya&f{dz2z~`TxwW&;vkLD2%CGc)?*|?he))`6_XP&YB%+gF=G`=H(?-e?A?k> z&N0w8_O4eDcknU%dac>VyJqg9t--M;;l2f4{R z`ITo2lRBCyyeP?l=hU-ng+?Re`Ri;4csgbnkw!spUmP$})YE05nX=+^3Qd#2d zbg_oB_uUFq=O_@%B_YeDo0rR0E_bplcdlOUa%H)E=5p8keLwR5>Zre#Zk+On^;)|A zT6*@i^i9{(-BW?UY|A3jV_scv2&g8=5_vU>jIAGs@LgF>r|$7 zOy}ud^;0D2bf5Z~vP@47lFG|x()lmIE8$*w?-`OP?&(~wd?ycBnrWFxhr!t#8%#o~ z+nJ|uHZrm;J-jVl0^_nOtLtx1Qd^N*I6p>Y^?4 z8@2|I=w|N_+5I)3pRQ|$v0fb)HQcUusRWV#epvG|$ZtP(3cxKXB0Y*sP7lva8AY)< z$!RhedeDLJx6_G?nz+E9#NAP*8r+nKU+FDb@d92F{^P1&Jkce-MmKW}FK&gq@Wr>Q z&&jI4?1MjKo+P*|5+2Jusd>O78}v9?&QW`Xh-k_90-DAYEzKJ6$Ob)5N#|r>DcouF z6SQYdPe?^qMqKf~`3V5#pnuG>65P=r{ySl{AqaoZB7U2aYC7b_0$Q6PzE1-n};c^ z$K{7<_VYvPN72$y3hKH7aAz1?rLJC;3W(Y@B6 z)6RL|yS_cGEkM#l%e1zqHRvQYv{>$98ZP54Q^0G{&?SVfEfn{of#?#LKcfSRuVcy5 z3Psnswls=%rcWzevBO02Iz9OD07s8rW9XX37SQBMrr9s+cXvFs1i zHWw-mY!}})c++KNp`y%vMQm2q;wr6ir@%O8hfE#s zihl4P?Q~5Vf4}A1@z;H7jCDoy2YH|=F(cJ zw)vFLc_%aiHh2*y@On@wFH1RNryDJ|9WlJ0VEN9{E+8UN0h+^iAQl5633 zNRs!JTr-?|gpjjST@UY({fdK*x{P(?A)5Q1svsuz|M2zZaZOz7A8>Xg8IZ7qC7?`j zP(fQjTbF7JxS$}tq7_@KEuo#LwYK6?KtV%L1f?yDRdKmDNjtJAwuR$k{9@Dj364g;8ZkTKjvzU{QGC{OeT`qUS3_L^y2pzfcIVmn zOAEjpu!z>je<-L`l>2Wg@ZVIc4lk)JkuqC?8l|?y8z#z)6Fvu4jo|HtA8tCdpPznph8z*75%YrX zD;eucxIYM|{Z<(BpHqa6Dah&9@Ei6_8R}J5vVt2p+NgX2_?zMB+S3*Dsqvfoh^a(0}1}0_STE^ z3kBt;p;koFz7b3kcyHZ+h@@jG!-rPJXu`jCBBmw)mrf}z+ev~(3FWbEyvMd3q604rsld{Oh`S&I zB)CSJ6QJHvja4EWQMn{nl{7`SdCum^8@K$U>1p-&PBZih&+}AW!LAa^u9D#c_;Wn_ z+*PY_SHph8AqWeZ=dL0Ndyf1*^8X4dN{Rb|AsW@9*p@6uBd*mu#TXNjxDtn%O z_C8fKUh~tx1V0rDw$2hj-;2&D+~cp<<2T@{uM}2?X*-G~Z9=*V{qTtXq`lXPDQtS$ zhQE?REsU-wN&C|lBy6DB5-1Gm;|gQ8H1%ghp~%SGana*RMpTJ9*d7=CkEeC)78S%7 z(yu?uH@s=oWuc;={`bQ2Jpb{E?ZwD5QJCkCv71ASXNm6VEx7l`L;?A zIDerR;p@UbrNhI23y(OfFEoTdPjAvZ8xM~R<+q-ug=Pr7;Qy6Y#Npq>#{3e?PUn687H^C$AVrh3S;^}u*;e=^@f!+%EvnKN>8Z$& zH5su9?S`LL2y|l6&@p(EG32SNk?8So73w4M{x3$C{Oo!^ns0a6ZWM<%&RGi8>#{hv zS+DS#yD^8Ghn2fpP%4o>&Ijm&cTsRGWq6TzYY_)fDzq<5w5td8E{0}bp zv(wCZMx*FQp_94I_^a`U# zEEHSrkZ~~{;{ryX)}j#HF1w7HCa4sVI4&P?l;|h3K%sN%;$Lz_MP(Q>m{TqsYnR~L z&bbJHo>_x*q2$@o|GZjlvli>WiomyO+}~<6-o;9cZ)I1IZx!KNxfb#%)bI5k{Arh& zd=Y4eaI-MnN{>r>BP$-fA{m%#9`QMaXBHJ^%fcv6T5s6;rl3Vpik;c?YoTS?`m(4> z*mfR^jjdI}#PGGk(BIAFa{jo>TJFuBbeYpcgsl$w(Mb8hSZlcql1%(p*m?rP-E^`L z??Kd{2eZ+t{vT#87x8wIkre741N;89uVUYd^0md3$9TiC#suh7t2GUEF=g-%A0p6a zE0?W~S+?x%#tLVygskLuMv`;L6~S&vzNjTGuWu@xeTyh*s!;nuafKGH&l0shR$KfG zs>4IDCnVBJ`y4u@*38_V;CBCARvZJq8;*(kIm(%@$IP60_r%VQt0B!>4+*R^GC3dZ$q7*r>39D`L)<$M(kvCxafXCy!_ZNZ zs8Jai83nHvQ-M@yXmojM@tA*!V&a-c%65!WRO9<=1_*YUQi=s0@$bE%&W@@oE3?d; zX}PvS{@&u5nkM6@vg1NQnXV~{F^)QOV4GWi1N0s}PFU|TUHL}Bo`4alqSuu<36?R% z;n_yV7C}S1;ey2OCxE&`i}IwnA^>bb0rWPcDRZ*7{Jcl?2KXtWV%qqXF(=_*tUlY= zobbAGPpF2HO{2#OYb1?g@NeFGFJ*G4v`w5c=SKSl_tf|!^LDrmQql0vg>Tr6B{(fi)57jCw2RqCxfgrEws%8ZTOPG@}X zW2`EGa@V-nDthXD7m15cNPvz=LVTGiA*Q6DmM)1^y(?)HHY-~cUtcDLu%D@sy;0dN zuMuvAj#p?KeM|8mHeM6neYokMvKQMhb+J@m*HkM+Sq(c1x=$-lDyxL>ViJBPW9CTr zLWOUr7r&vLD5<4WWz06Lw28=6SGZxx6mIqh+=dwf+@>&~EHwZuDO6Jt6|jsgG||L*>#>2!p23_XZsY z#gIOZ5gbRq;k=Qa4z&_)RUaj*m%-ZG3wP>eGZftTJ8GS7>P-}@X7U25zD6afM9vN) z)oHu|8{YXR)L4x>9L61#H#Lw~60=RG2M{MKu*cy`1mn<)j2TkNFYjWq1CKychs_}+ zoE(6Q&j6&3xI=l`rGlx)`}ZPKUIEd5)H4GA@=Y750fy;hJt9~tFx9wQfsl3tuoJo_ z%ELGL=nYN!DTOUx)C-wgfi0s|^ZqP_&;=_Hl)%l#FS&n0_oRQ^6bP{Qii^VBZjM>D zMhBut#qPzs1Go>P_1Cedt+YNABujrEDyuBQC4?MAA` z(c}$&0?aN$N-&^ASfVV6ElDUz;~5kJe*+WcrLPgiLjGelS%}ioH&#dms;Wm=(4{c^ zMM-40$Ttgxe2zMO9u=nzNVLPg7xU>LN&(EeydbK`Z1X7*vVZtd;u(v}Dhw4`|9j`N}@D31~MF3cmSG2)zjlib#@-7>;HyKJWYF#zre())l7prjqk zl`|KH*UO-+pq>NGsCGq#&8O_Rv5_*i7hF7ikkU>08A_Tc{s&^ZZDT5s5@@)D+oT5D zn#6ZI=>UsQLFothr&(&55JMA&!Q2NGU-UZWJNusc@hSCe>>gtTviNYqjiR9wz3lVw zF;PM4WbBT|!dj)=o4m0h0}r1Jf}`jyon**;UoW!s>G_3jn^^)@r%vyakxeMN@}>O; zmBhAHv8hBz3T?7@yq`pkfrfT1JyE`Fb$Inqf3!OZTi`cgb#q9GcWT_?R+!2!R&E!9 z!kYI6*3#;sKI){IGc)jq#LPx_)$Y+=i>fc_nG94-&d?POu|YH~?qpDUqlejZyK0_? z>9n5c%y674eUr=@F*tp%l<|9jp&i02LS~6q*QV7=xpO_(mkGz(vo96m=^pIfih~Kw zX#z!j6KS$ny@Ea4)0j}bN4!*?s!$)+rTnsNPZL#JJ$xvi+B-KyMn(?vRj4Cku)5vx z&fp%g-Wf5N8i0ef;hn*n2*~M@0)D8MP|+;^fQqioq9)W!xQt*D!(SsYS~v}o@E-)b z|C%MVUJAS2&Ihe>DjH+#t>9r=5<^X37eMkug3SBD=rL$93li$SxZbZmuaTIq0N%b`Z3i=s>$+#SPgQKj$r)2>-*)nq#~AE^(c z{h1$GGqX=bpQ9v+nc4g+y;e=mMESmGyXK<eMZ| z86EI6x-C*vbRKpKk4V^T97@bE3eiC?XUG$AyQE+9s$hYh>f6L`D#Bb_B9@(Y zx%+;lIx+L3<0mjh37sO?0FM!rSF1QzxLZbfP_hx?vhKU#H;or!57OcL%HSjZqQ}|Z z_0FF?Hn|=uXFWIi;$@8&{FHw}xUYvz5DZTCi7yT}%=@H9*%-@mgTQ$3ejOnIHDXo9};vcjhBhH{9i%8DmlY(oLtrK@uqV0KHYJAwU|@;G1d;}eCG0@#5_!Z~f^LXk zF>M$yZM(8%dxhs(%4=Jts3<@%f@E-d2p+`7d}8Qu85)X}(9QjpKGS0C?w%|d7>^y1 z3VT3x9G=wOU5<|yw3`&|-R(Zt)FH*8RB*#Tn+VlcWrA-EMO9g`4<@+2Jsh@bRcnt( z!drZp57p{Xk&{HwB~)ie>r&tc+Hvewx)-dyb;gFe)c@)Ir?6g8EHqEkyXmp4E$d_S zcV{_dyCsPATDTRi8#{Ei+RUun1j_UojMyu zYmzE{BpV0Tu+Xs^t&>bQHiTD{)m0fi1x4P1sd0kiBcHlAk_{6SMF?briF+eihyaq8 zBBk49t9j3z7kwid;;4EVi6i_!!=Qr?L6(L~yU2OlbfzRlFBo4JrXE!kY zk3@CJ)VhIr+x?O{4QJt}Hn4B?KLLPjI=+Ej;C`~qD}XTs+Xi<2({hN_p@zc0p;a{J*mOWcl{ABBI3&5*CC?Sg*CIZ8S={t7;)Yq;l>BHjHiML zjpkYU5T#+=aTxXexuN7NqIOaf&aPQ^Wev%Fl0@u9ME1HXACZV=5^)$2E7o0EP9jc_ zh&QRn)W_I+2Q$aX;}YsIigZ7JOP<%@3s4++f9xsWg@bcWF*AhND}K()w0Z*!Wv8n^ zhr|H|&W^%`8%{zadnv_)QnLf=#TG8xuYWJAm%(ahX9!ML3M`+UA^9XAPW9lfg}@5a zL^+)HOj*ZHR_Q{bQaz{vo3mr4eV#5R=J$s57Qxd*gkQY}AJL2Fp#;+&O)evFq11au z0r7Pe9+?%@iq~g0C&afUWPGK}dl>$?5wq2KYJ0}c{rz0~#8<(-CtQ2St76BgFYKwp zS*e1i<^+2xn%dwFh%1r>5Qg;2Mk~V^7>0$TlJ(xkgvdb>eB8nsmx_S$S|3#(B$VRH-y*<7`>RjYK^#%{JWdpXNB&NLZ$+o4Iof7niq5sNUcVm$y3^k}xXgDV_G=H&tfVg6S2&d7q?C^>KLIi+fLl^;lcDOOy zfUaXlljFGuKgva|;in9;pA*?z8vg%lV!sKQH;pz=CTi(8+mKqgs%0K})J8njzeg0I1KItzYF9js!n7$0Fd*q7 z%$+2hMy$qM&U$N>HfWYMc$PM#ylAbdYAqx$GhkA}->wa?YgJY)taFCJ4jZ=mzj-Y6 zFxzHYZ5ejkGHRmp3&9f)#t-%0$sC^THL8>74- zR&9h?%RtAUAUN)M5L)#P^!nY-*O|{%lsJBWlM1#7U=R`Bv_Tv7ctFvZ+lz^g6H+Ku z&+JFRAjl^Xb1d)};9;g|{#B0|pp#ln*ptdL9kJKb9JEINdxX9wnd34@X|ai87le{lIN9ezf z=;(+=!*qhc7XdhPvYDFWmCX<{*V90LAScoSOw{mA z)Oz;T(_|zs(s`_R>*rr}CnI!FTCyJe?(k)j7~^YD&oLPPva<*$V+o`d`nN^Vzll_BH7OIXhdL zD(7C4w#sq%lk&B|=inFzsGPI*3}z{SaXKPoZTe z(w#M}<<13|zAkjX3F93Brh|pB&`K;JGmZcp>*mW$4TYAfe5!PU7k?(eY+S|xqBoC+ zOS>-%-Fi(UQrYhVO#2Eg`wFRD6Gn311(@a+T0ehGjc^!`@vSI?N$kZ!#J$u@MKP~n z*wbiUA@i?pTrJExXn{{;PNA|7m=CT)Ep6F#w?h5m+aM*P zSfb3vE(NNL{g6P*vS8?*Ux5qr5gwTw@?tf40so&wX#b0gTihKfhit4!C}exVylRlQhhT2lyhJ2Gaqf`Z(_p~K)Nn_JjR zj=J?iA|A0CsC`X`9sI8>85_1IcGUC{&z+(EB@Lwzibo$K(}uOoJH4uVj{$bWnmW#xd-+edg+7ca;=shBEo5%oCd(3M$5{??tK>Aam(Cr*_w5Ro%JAMZYs*1kG7Fr_geuLU@%L;prm zw!(GR_wVptAcI&h5JlJne!C5(2^3mygBF4ZmfIuc(5n0O_7m|fNxO8oa9P9MT4hGl zJ+tu)^O<}86P^yG=5B>dgqA_VIpRMGAcrhgikngH0yOb(%Zt0T~`XZ5v2knkQ~@Pk4X zt-|QDeD0ri!p}AaOJ^i4m?kpEp7Dh)_ z4x|2a%@0m9iU7F(-9T*)uWMq2Zt4YVCLf|XwGL9BtA^w7p?TE+ERcI`!J77Ep5_$z_rfr%2N=!iFh<7TjizRRzX0}0EI?zPha;H{rH zERqcopnQ;HsSd3_awzynm=OjSO$>#1Y{yg{5HFpIl*1iYGOpE&+r6IvG?%pNCZ&k^ z-q=ia%9?B$O~24h-_aLax?7YE)1N0s%CElUfw1~;(IeNPuIsX0mvDqUB7UF{xJ(aG z)X<$ElSmZ~XhnDfo2i~-p#4*HieiJ-KfSouN(WTXN_?}TDy1&vuvHsi*7`v@kUs7Y z(igV)HkqCFP0?MTQd;a<-)aKKd@+F@cUz-Xq0<-JGPMEI&n9a9pGc``gd_ZY=&EUFCF%qDq7t-9kpvyt z(ThAs#5Gdp=ipkf=!brG+ertarz!EZ-61`+A71bI8)@|usrl5CnRUi-V7OQR;{oCy zNEbdY=^Y{*=g!v++6j^MQtlmeH6BuIOlL4MHY*{;Fz#!b*)JzbE#- z$^2blrvp2$)Y5I*s4}49Ss<(Iru08+K< z2Hv^jVVs)!8`2bM`D~8o zJ&Gxl5UJbDjFn|Mg$36NDvYi(*>3Q%&ICqDOqOKX^0H0Y9ofoakA|uk@J#f&@WmcC z{iwAaaZ1~kHPrVtRE(>McmVjpB>zq7Qw6nhIhYCwOjg5CUr3rZxPcpb8O0 zU~_{O$#Zu(NOpApr9gLq6ypvchUcHg zuzv+wNDO&(I?&-ENcET-=KEKH9?uHA>28m14dun9_qXkxHB=ZglZ4S=kTY|h1;e;k zNf3t!CYA)=L_1(-FcG=v29*+Qpny9O~@{Q#6?9-L5T|HR48uIDA=FWaES`# z{73-p;%1WoDrejQRL;#JF{JV|5_l82+<_&Caew0)#E<~mMR9*4X;2__geeZBmNY|B zL8$pXCiYz%lTs!t=A=U_pB$W`i8z7shMfrVfCWFCWW~3!!|`RhNtgJbAgXV~5@w}G z|3Qn*?LTNyA)%-F1wm94C=}d4XfgIc_meFUSZ~Ri&^)$=M7$K5$5kW1Is?hcjTZ$` zJCFyrI^@Ca5T1lV1kuXi_8yW5f~eIX+;AI@Cl~T@-$4>J;SoRKet)aEtE8Drn9aBA z#Um8lr?*L4d8B)nC^STPP;%xy8AAEDL1-?c_^QVs3TJz?(WN6hmDkLEQ(k3{l>lRPU?mF-g8dNsBv`9kZ1kKNvfl#s^;W+PiOO)P33g zY3!lK-a|t5;cL6E6FC~HC5j;Nzjho_Ciw>cPo5g&F`6pIazStF(4W1axBQ|v{h}v_KrpXC_d}Da z^w(;$gjzjSA~<07g-uwz3kfgC*!UdSP~;dkbf1JuhEgdtZ|F#f>R`6bjpI+_d;idd z_?LgDr#4wy^wj$&g$K2@%Bn0BbQU6!w{ADtV}^@lFQ6YAwKYPV zpRuo}^&IVZm~g$|LitGx<<*=;OwL|(H7aXkn+_YTms|(w;;aiMsJV>I+Gv)>RSJu+ z`qPH(Ysb<x8?%~~OaVw&VAr<`Ue_Y&tk2v@W0CarSR|eG+0&%jW+AsW zc!50TU7C^?r%f~EST;@Gl-($aIc(q#J&Ae2&@PX8g}o;ouO@7e8op4sIYZc zZSx5h7;N1o2YiB{s*(AVF!Oj!s|<90%87az_FraMFuORI0&c)=imAEA#rn?rVn(99 zdxjk{QyF*UNiF1cDZ058f3#iGd@&=lU5Nr5?E@XHTkKzKH;mk))|ALnbSEmh7hR*g z(mv2n^h)?ZKkOPMj~dx5vThN;R3*RfNs5N-gSW0M(i>fL%DIQga&7mynrqfCW-!0H zm`i_*lv|BCCas=5_5?nUi6(NMz|+fV66yq|eIQm=a_?)aq>kr5*@ z)#=oLY1c|lB>v#C4$;+|z+hzeJgGea-hgK4XTOsv;C10d^MNaK);!oY>_q#(7>#3z zYxoIDq?s>pE)YbwM7Q_u?ine&suCT4O)&OA@wVu9qKlV|T0FNTY0YeKf`}BHW8C_jS9Z%$&Aaj)&04zXJ6PDJ$cL)Yw`K$IY8Z>*Nx?5LbUw& z1y@b=_A%ANz45&Y%^qKo>|X%?V-?w0lig4^sc~pSUDJpq2ckU9r!KmIH~j`D$oUCp zp2L3>{JIW)C1EFk{P!pkP^o&2A$-WcK?~Uc`Em0F8x|{h>Q?CvR`o!aETR0#iBiHI z_`?8E+IF~PXeI{)Y#xL7*Mdr62!khXK8dj`=!}vh7M2_@%9~&ABB#k)WP!d~y2W&S z9k+s*LSG!f75uS4zS+cNqvdX5ER<;sbO+{hHg~|bA~)2FSNDgP_1N!6= zHiXbeS%ZC$!j33rpFUxy`V;$vnOY%g_{M|){c1zTZL$yG=kUC*Z=w?DmE{?#~A>9rC zlBhNh1T%7~&P2fA}`!2(%;fJ!mj)|pQ!nSmjo5k;+1qSeXIba(D;e1a2z zjz*dt>Q7M60CUI?iJl%o)$(rx&fVNfKF6EP8{+0B&F7bB*9h%tu#dK58PSejw(|{ZmN6c zu*$D}BYo*ejgKyFv}y1GU-$YG>j&y}Zlasn>o1cDOu_3c*YS?2^T$473g~~w z*7}>l&Km-DUgF-OqtFU4gTd@+yO~oZH`Bwp^GYW@v-mZuzK-FewXU}0!|uFLSWC36&-S&^~<%=!(y5w zdN?k!!+(kdJRuC9lU}MzzavpkchNuHdc1;lmIFD|WA-2ZEl~uhoc{%P0o93Rd%p79f(GKDhG6iWhkftN6F%5}zX;ZU9%0$H zHZ+ud60|@*QHG8{9~_a^h`F>UWOgT2f=pyifZI>W?T|Ozffp3AU1wk%GbiD>4hV@4 zWvG*m#j(F-l;+D~{FE2|zydgyqChL4defsEcD;Ygd)#e^FBmnJLdK3E;g1=968?=q zND_g=2t1PwPMOyPxvqu z!D?Od4dXAd^V(CrtN$ZZw=eM0-M;`_4|5D`1Aq3g6g)yHe5p4hRX!VFe<}Z(N63}W^ zt&gTW9Q!kg5W6FMHRVI<#R@C`eULsX0{yBL@E-?I(h7}#!Q{12ir>~KH9KUp-YaK= zAYbK$_p3*Vc=3H+=ASBO#Qv&e1*7j(-H=m(ti->xju+gAewzBAl8_gpZ`P8(ED7_(SU9E5%@NtUt zH>GCLG&l@z4CK}an01A1!1g&tuA>xZzAoH#i7gE1LE+wp$b56Azq;6<%XfZQ<@|7m z^TVB7={+jA^c`!Y9wyj#ROa>V%H1`GdVi#RCNY6T<}u|l^_+{67X9KX9YUDBr9&x0 zST~xNKT_V&4+$juv)SQc6R*{rdeN{@IDLS6)K`^4N<2>L zejj@)|1X_A`bItw)z|VDi)I}gwitBr+%2#t(N8nLq;^Ms!LRw5-wqoK=2BF|!q4sx zhf8YG-q_3e!(gQQsjfzWfXNC#=YSh}rq^sAVL6{~`zimH$k7rH(^8Z~HOkv?^pUE~ zTT!Wc=xILsQ1zbNdh~(nlH7juiK^7warCjO)!TWrPxXo1bTXg6doSo* zI)8dwVEQ(nyK|4Ndki+PBD96cR}N1z+48{(rZ%75Y$(w(dmo$rlTYXmhePCO{f()6 z%>4TvK_i3=ge>K+7oA+qW7AjveZI+66LU=a9@p6OdVm))4k-le5`!Uyn%(qLrR-;y z6A!njI~@}%frJ9gf%=|0r+=jqI3qG&aYu~C0E!h`yS5@ zJeN|HXC_p#hTfu6@X5g!^iuE0n(>a<1zu6~Z9m_a@-G&Roz>DfT?E6ZEyq=F{`|;l62N*>oPYW}1%FCm<&@m7K}Y1Kiyv{5o>=DPpM5otu;)SO zL^=7%P6AEmt_1P%PmJLinP!2E7MKR+xyxXl^Uo{EyJ}oqsq+;Xb;^3#5sy}ByM)^k z#2@Qwm1BjZ20JSyno4QMH?azXASfQJ;Ck>eSar1gZx`A68lQVu6_-t<3a&n(*@GJ zMyQqMYqm^h8gQelT)Q#k7;^~0}{>(6`>JLX@|9h`Ug ze1AXic4sTW27eRFH#r;r-DLctsrp|+#bN&1<60HlBX6095p)kc0^IfzKKK>roI&ic zT&n~~6%=9!-{qH&7#|4vx)70j8&lyHqVQ&F7hF4nNb0}W@l){ZyYMrw?UI7ad1Vz% z+n`Aw4QPF6O?l^Su#uY!mUWMgcY#WhoH;YPqhqvjp9JRE@KbKI!%9v}$LLy%zDwEH zMP%s83#JR+wF-FPk@KB?wKmj)+gUUNw<~DGHPSpebJg7zj`v#%DHY56r9w)@|K>MN znM*H@oep0QzMYmfQ%9e&`N~OQXrKPu2z`4*cUO3WwduiG>m`Z4E8==u!{LNZ8&RC- z1XuY*jB8jMVAcVXjCr^gQvtQE?Lr0Px%{q}veC;LMrUMtA@@(YE;J|(lM zt47Ig{hhN40OEm0mYdj&_0XM_Zk6J*x<0Lo)pm`A0s8oOQkax6Qk&qEHWYp zJqO~2&FyMVgSLDq!E0KznK)%rPaN7zw^x{5PxzPdL325BeVv_o50cNcdrRfq?0ZjS zw><3h>sER$R`*&VbZeTL?wvVT1{0*m1ug5r5yzrWv0lN4jn+kkkWFPltHjt^Y1b}bmUfx7^Qb{iZ3ez0 z&z1K8mMpXx2JO5a?E)OHvVYHelrOhy{7om?R2YlT7_*$#+Q?7ubi9?N%bYxR{}}(zwV&_ z(FrXtQNcKS>HMU2IypVm1u)<^tRy>Krh}`j)nt1(gL%nIZ(7!^GHc%i3S!MF_ItmI z>P0Pk5*;41-rbRSY|&JOK=)Blg~P)@znOS)k!tJ`bkVL&l-jy@2UwAI`c2i0H}6M@ zik1kG2N}#m^vu-Y5pwI0byVJ&!1j-UhlEZgdb4toF47poZ43n+O*}`nHAbuv$U^K^4i-V z;XNv}ZyS#ONg#)tGc&bpk2XE;zW$Fwr}kYdy>Q=}5q+%AFZ;dl&_15^OZ;*j9I#sH z;LyJK!iS7)THxh0P?Bg(NFlTjU?IK-71`a`w z?6wK)LM(awCw}%*_tIyK#!~xs;@ zW-n^*iM^t!j|5IKZm#XVRYAYiL+hvk#B!5%kxJ^P3wRkn)Iq;}bDouc+ctkq!{jDi zL|D?-q9V1>baSO!2vqNv9x@fRmAZ-qf-{qwuFPK3LoZ5tS=cg=VgO=z5kfzf+s79t zn(24bbH*AMR#trbXZqr6x`=^|x9{(igU3ltY-%mEYZp1`MY=%>n8p$_PoTKL7K_+# z{i!gk#vh4b5bR7#*@%$8R$sORSkA-|QO)HAW7afYd0^_awONc^%Ii8HV7Pzb^>FF$!53vY3N0uPU(z9ZPLCx{E}6e z%nR^PeFTC#<|98RJr7UU4gukYJ9qaMHLypU0sLIBfzAX<4r0U#?K|5Bz}pw&*JY*W zHyi?g(oDb8(C{w%;_p;28C2&KM^*0+QjHqaqw#mrNoOCe?gkSU-o!YUfnKC2PHzku z5!HC5xg0OsOiu0Fowf!Tvuh!J@T_al6IX7oMrU*YGPz1%GOxCnS5v##azEgV05%~; zOyAAr%KV%f|M{+@=RFeXY4OR1IwW7%jkJuALSxubYf_E+zT zrfRV8ks)r2PhagzeMr3~n~vqks(p*!P1e>2skRqa!_aRpOy$M79NNWZ?POB>6f-@A zndi6tii2M4q^CUaQg=DEQ>tqRh9;}OIp}quP8mAmq!-p##iu1+KaB;{nsgfS>s~#E zPOG3NS0zs!|4Piffy70O z3-iN4Co5Xgzi!^;MZITPwc1Y4H>8>MjrC?0+j~ zpna-wAG1_>UJiQtj>L?1XZnIKm_-kb9TMle`l+A%Nv)^;;k3;J$|t2NLvLATw7^xf zUO&3TyVfhEE2Uj`@K<0=?6eY!2I)J$>~3jFpYWxFe$7e0TI`inGjzK*$d07pgQ*Dy z`ZX#_|8bLkU!mItOm7G{eN!u<)~Ty1-dmT_Wv2g@(q6s0`S`v+t#rC7`jFY^Vt&45 zHr-_h+@qrS7&1B(?DKx?G;FCexF@%aO?R#I4C9%5Si`A{OcMAHc5S-s;-6+)4!76^ z_N7ej+L3sCK8au#xsJmVQkY}$j-uCLDV^&H1hCYqg@npkp6E=Q%H0m6#NZ=ZwMlEJqAd)F>2pXLsUBt3#@fp>?P;&-ChbC^otOS64SNA;Mq9O`sAPM& z&Yt!j+ZkxpjxlRrbQkvqNt)JQ95eB;JLx#n{ak9Vy?ptz)GwK}L^`aA2^HlA*`YrPU1$@SI&xvL)EP+d=M$p( z!RP$Xfwfr&wVTgk{~~lM@#t$J86&-)*_6hh+uH~CtBB=p{ z>%e%SVz1@vT-%XcSg?U6@(uyl76>h8rM+BaPm_&}1+7hN*mx}RF-=@Swl5~p59quQ zKHSX@EfAQ#%(dKF(JF#1WlMc-E!`?I?aO7p53=mbt&PRQ-dtEcu1$anBQ;ed0Dc#^ z5UQ-X(6lGl61{?J39>}3s1+jN=ehjppjzeAgjOUB!9+h*XzE&J*`3Rr3bG7FHTiPW z&Rq6n&{Fx+gia(3qo|J7Lep=nET83aCxR?na$7|Uwk+3{m0K@m7WhCFADg#Q(JD$*=n-LaD${sR4|riAJtbSgKO{_>LsE!bhNNyKh++=8TrPLXQy<(WZHoE1&-!F9&OTOIor+oi3Py8yj zAt*td8|N=Hj+(T`@5OqlMU>kr<_`oJBm|sL14G_wqUR?9fNt8zHHUUO>rS#DpbA>m5OJKv4$+*-) z-EN$aJqooR9q2^FQ5+YxO}2~=#H0}U*%tK!g_Q< zPWvoB4wIV6{8XaCJlqE*zRE-O+Bb($LG77I#RIGIS4FvayS&=(M{HlmNPFEF+v9R# z{DRvSjP)Xl{2*re{e9yj$J3F?gCggtVT-+4QE}>_YO%t4>VXO#mrwPnMkpvLx|%r+ z8j!mPUOpvCN|%%d7>8DZp1mknX^8eAUSP{*z6(egmQDSHg(38Xx#}SRuU(7W8?aFA zo2?#Lt)?OmdkXeOK%RRVQqZTktDmVr=A!&-`IV@Z5K1loK=0s&BjEU;z!bTH-Fz1S zA_cphm;qJryMlX6f`VBjVb7uhQ|oeqg`&dvE56_@5MyMF9jz?-(Zzg2?&sXLo;06d zi%ride=P^BPtT`z@6hsWprJTiqS)-9@>g>VpI@`g$iX>LQ3A6nWDFD}=M)F&g5cT^ zOx-rmS`i>w8fab+Od=!d*L$WDattDI%c9L@;Fpif$iIG!pL}*g&Jp4EsAhm$x9t<2 zj7q6HzvB<92>Sl&!`4JGzbwd(2hCUWg_tMLc(mL&)kwK&ytH1(Ja7T;Y_csw7?qO<1$Y| zZz9(HvG-k5ANBLRW2I<}hP>;SH|&O`W1X*JKEQ~~dtG3~?-rRJXGaE05|fc8E?JbA zEKE!mPlpS81BoHncgO=H6MZ6my>xN%@;|bRJzIvs{+Xv~UKULMxaX@$Fs^)}h|bI+ zf<|U@AY=i3+$Vv|sJrZTtpAJ-itM4M!HQ00Ha_4EJm6gcwFU58FFqpV&B4oUp*TM! z7ME`?NNN)mJuldGkU;XOP;Q^wXEL>q`KmuKfx5-|kl<-5l->F?05IM_5(H|3tL+b5 zX7;-Swy{~r;ng300by=Le>g6So$d~ep!k;p2$%0jHVm+K4a_7c@7HBi=n*LoL^~1w zwXpx}4?ISGa;`%hg8a~@L4I-o_87?siX9V>qRZa;A83?m(tC)zcrw^0vLne{fJhd) zjyTvaal5!C9qd5jdIV*!PYi{=g z0z+hu2bn6f`Pc5cVF_CUO`m4-v)vDz=rMaEU|$_2Nt%Tzj)EpI>jTJnv&+=Iic7ou z-xm~T=zmc!%6r5o1)0`mbAP)FUW9_53It$iG~6nnd<9b$Xo|??lJ5vAo=7V z)1WMF#$8ZCZ}JymEIVnIFRF{?>OIM?VfNt0@Af1W|3-;snaUF0&8DIc`r9RfQou<$ zhs&g1&P}_k*37NAfmP^D_9qwTbC+9%p@K$J%uUqI^tw%NaBr#HYxnE3veemQGjRa( ztrBgXls??IM${aap6t8zvakB`%&*JLI|UcwmenBBoT^$75M&I-0$A}zwa)(P`&#_%Kv$B{G7`j`_li3o%)^YLY$RU;cgt{ zUr$3s{)I8c*l1X)iI4{K;Aa*_kG9Oid`#CNJZB z0{~uq#NhTG^@vjj0KEE$RRYb8boVFu5~EFA=RKgMUs={J;lD!(trFyXX&G~Ylt%d8 z0)Dq27;u0CY%@3pScfTNvL`Lm&4wt>(A-jzb?R{VM@I8`Q3FxwonlR%?AGm_>b@pn zRp5#SRGb$ZgsP^g%H%}DbjtjLNMD>_nC@jh^}ajc?Y<6Vc7<4f5SbMp+7%z@AyBCc z$JMS_YE``7ABNpQV>QhVc~o0q|3PG>Rye5T|Dyn#Wv7{tN9puhKqZIW$z3}}`#!Tw?Fs*kWjnS9lnn2SE~H+;Igz#nS5I7er__8^NA^$c8Jab8sV z54e!u%x1hx!<=Nr$-C}gMsY&sbb(>I(A{(W-6;mESI+GAJt9YWbw1$U!tp$yb{Ta9 z=2wSOf7$%4)X3duKpta^kVOaYyg6_XntU1STHqj62#X1J`}Lv_BqZTqnljE7$Ii8(`z?F-%50U-l=hU~rpBhKQs8ZEr&PnyQ!hNkfs z{M~4B+(Inxei)j^G?CW?f&L9ifB=KQA$Q^bA*CXQKjIH-=8sU_5q~H+^5M`VMBj%} zUd*i?wkCkTdY$mTC%n&svs%X!?2#Qb?B#2b$)1rRUL)Qa;Sb*op?=h{sXJXWMosvw zsLI8(4RD;goxY$m@mOEn(Y~oBH@)cIHK2*W&43CT+4TTuoJxW)u;7BiV~RU$d-+4k zi%kwNd>#(dd*m(Jl1j(`1Z~fK2&y!Zgx^H|iF{Ijum!Dk2bYjo)?Mln3bFhYl3)#q zWu68R%TDgkhO<9Ekpy8u0ZN$eAdDwaVcY}~8;xvUT*9+p81oVd!d?Tah<_Fgmj2VO2r>YaA3U@G!d%>;xLe_fNt3vOCP$ENs5nznb zQ7c1V%v!&SO0HGfiKfZI(DDD(G~omRZjF+KP(O0N3=5m();yULaYV}A@`q+Dz=wR7 zzkw)YEZlx2ZN;j3@if3H!5r8RkP7Ai#r?+}$a{?b`iDPMX>o?*zwn27w9g#n=T}sC z3KD)?u`u{AtrPwi&6mjWh^H66#OZ-ba6H{jRo z<2%HX;2oUijQ46y{I$T+Qv{UBg%b{llnV|9lPzD)bp|da$&Ps35h0lqRZ zKm{u-spNS9=kMENG^tJY z|DZbJ0q|{`3UD|DCKe~U1JU?a&;T%#h-oER?hr3OaXB&5!TuAxResYwr1=yVw(=fO zCwY&ox(@(7sV(3GLuv|G)c_n-;rbiizy~BQg#8XC1p2rRnGdFjA~DIBm}GJ~1|+^U zIq|J2knK6YTYtTrX?n!{GYEvc2k3fuG6}DP>Vc#pImwzl#hyHMdGcff@(=sRBE zJ5kPk5fsyy5DrHImnGI2bK+*a*3g(>ossUo&K+CMclA0=0>>8u$2$|9@4O6pC{cIM zeZB9#_VGjeaC*Eg;6?|OzDHoZKK%J0b}L_cihVNxh9De(f|s(1AhJyRS6V{JA5?$9 z-%*+@D=op~Y5z(FPh(eF0^ILDWO|gQ`jr-6JUPrd_O`NUqYIsZuMho4=xKwg3`0l% zO(ERml3p0kdkc?$>#F$nQF>K(XcXfuoN6anR`lCMIeRD&mL(1a9+9-l+r=0FKLDD{ zdXN*>SAi`dx_4xR56BF}jKORQfs}YU?{`O4C*h?JRDhlb?^?{fafd93I|i6Gt>iwv zgALGBypsRqj?QloD?=5G^xoxBlsM5WV5Rq3c9`K%{^L8?Pb>dGcaOoD8eCjra?MTu ztyyWgyp*EY>(IO`xVWOiF+jh`K*}e^(rq z91z?(Y&RV4mtlj!m2m62v3s!IJbU1(pQZbStaIEhf3i*flR5=?PbSOJkKOd4?j4xM zX5YLAMhJ%XynEP?+j>kP%Ppp*{7vA zX33qF@yz>oc3otalF$ctc8zDBC-9R#eq@&^aV4JzV}8uYE;j3qaY`kOf&p#KEDAaf z9kmc<6xLX#2QYYu=c!O;FwRrV?!eOV;CRf(*$*jV1K6d?r7+q|PFz&RYn511-wm32 z?`c0`(c4LW-=qLG9_t$ydLuG))%pGY|Mz`AA075u>$$AwzMl20XFXnQJN#sd4+h$WgnxTrQvF=~ z0_KP2=^gM%+kzFaZsx{1lgRHU3a8`b{Iz%hYwH=@$xPqR1t1Onyw2gLR%lw$aCimh z`xMLw+fRJWR!W!2aC!vJyi6_1Qq5Da3s`>qeA&W;GIOb^X*>%Bg=<2KGU~|8gd-^- z+an>*mNnml&%)*o>l~M`ISRZkVA&XHoujOpg9*QGrc2mNg^*liovEyv>B>E`f<3CU zoQ;IgYJGxgdBQt3T5`YRBmMf$d+q%&Gajp_MoV(csk+ZEh0QJwoBg-2*)KNEexYS{ z$s!0eFzcoxu47s2r)uB4{_n?e8js zoyJD$G741ZRmE=Z5x44O^O(kl0j)S(?~)dZxx|DL&{%|Fg$rooVh@fc@Ue{`!&XEv zP0PxQiG!$A6ef_39$~Kg-t-d#40kLAwyy7$U0>d7_jN7vRl<_2IH z4uz9H)$rG@i?as^rwd``z0R_9Vlb14+$(9)JmZ`J(Cc!v4^EF6>Vv6szS0c6ASb>F0wQEcDkT zd6&igcvqI~ub-^r#}0U}RKLzA0RweVTv~N!k`7hl3KOK%N?wf?I9911ub^{@2$2gB zf**P%QzJ50y4_Byi4oaB_xD`FH?uGFP#Ep()B&*NQwGJdlVI6SzuR-*1ex;K2NmUH z4x$3Z2NaZlnW|*NS!jX0ub>AXCqF$`m(NYW!u%~pFa3_5uk(YA)+3FQL10>cJ?wEY z(GFntd4fD~K+vo=He*(2D+f3^qqq|U4}anSMa)mO43wFFZy9N0I2q$zZ2CLjBO;cU zflV*KMj9V#gxFi!s=ngJFYBRiceSE4S2UQcwwKaByVFnm9jM{5m!Z3C)^S<*D{g{Y zUpArthRdWB{O(_H(9W;Ct#(sz>J>sxA^WC|b6p`=3dPMj{wa82;TNKY#mD|8Le=JrrS)CXi8W#p5~F+3xtm(7y*_SD`ryF<|-LaJr| zaB%iku2z&YiYB8%9jH(TD4MhiJmZN=@@h5l2`BFPS-%-Z)3e$KcZ!obk|*297x6Mg zFV!Wd^$>`?+_mlDb5e+(ySDW^E#dYAB(J(hzTzy`vS2K9w4+g{U0Qq1(yTo?Wmg}8 zZt7?Oq3xQbRf}z|e#~t`+vz5nb^3x^IUPbUIZ7g^fr+yS(HfY7}Nt*gR#!kJ(NhLf`F*i7m79v*JNt1VSCl~FTY z*(JIcG_?N!LxIKgZ>6azMf$y$pmr@&E8&;9B*}o^GPR3I4L8l?J1faRpXq=pn+vWB zzjt-k1^QeEjwM{g+%BOFd0L`_7dCL^%Ue}!E*7VQDmHTf?LFWmoRnO;0CsLQgyR$o z+)R?WG0EP%%(@D*0mhqf=taDFN#D3`Z_3u9-Dbj2Ks)1MYU73G=eA$nF!@-6euHM^ z6pzz~7o9p&pM27Dxph+DB3?Zp>sF-UvSrn;?f)UqvUEM+VtPW+c`LH(38nPw%Bpa% zi@lz&R4))l?yDAM*G$*CnF{Mn7h%^TE_ykisT1B`1S3}pfS7@r!fokQ-yAz_*oW)1 z%2Ulbb)To;qkelNp4X9|)M<>x+dAY)9c!#B9M~z@Tm50)=qff`>uT|cxD?sG#9-HL zDQD`B!7=-_OifseN9E+@Q7x_+UUkv0mKiUukA|}WOU2NLO)eGiXlc4wIexjdIy&so z27!+_5mNk52aRq`v}B?A|WQoyec->h-rYRopdP z7|}1(&}C%y99g@qx`|O>9}`!TXi1(PH!l(0|J1dUZY?byS3Q7w91uu< zhJx|x{#&q^veziS{@R8Da}pf-@-JhX#N(|jS9-BK_uaC}xJ77%uohL=p|zyO*J>-j zS+>&^pQN5=4rBso%99zYGmjr66fCkDFOs*-OcycvoUUHfXuJqDS)=JeDP<|e9bymZ4Htmq6L%l^uLoXo-CO$8`|h{g zKXFa7-B!k~j5Xqp8#mnUDh(;M)v`jdH0Ccbn zeqA6@S%U;(kCLru!RGf~W9E1~2%NMC0z=)vj=FePYovRbj=W@6mfC^eu7Ov&;#Jm2 zPiv${yNv##*qJ_**J#Sx0>?5n{0yJE4@&LO2{A?@g|)ba9lDH3<1rt|^9aj6<}=3; zWgQz0!BXC1kw^R3E00kun)ji73aT9C?eHM zK(fBgr|{08@3IEpWguy1xtBkqYk1ch{C z(TnheE)hHjf>72q{6%V3h0V~F|Dq)CXTxzlrC#>iQW)-BSsMJy(v6zE?&oKfb}U`9 zB{gEV_7f!&n8b`qC~_nTmid#lTpu*!+3(PH|vyjT66i~2WP#6BdrXLl{7!JDm8>Jtxcg(It< zY|y4tgCTZ;N?A)WiRFh#A)#Hn#_Q1&8%8jRTIvcJN8{z(Fq&qPO z7_8R_O1z41E@Y9o9_$jEv=IWk=9dj8h{2L|>redU?lCYJ6ILySzn5p0o?f{Whk&|G z+$Qf$)O%j+JjHvG^SGjEmZE8rLOnyFeoUdBu24rR)Q>9EQx)nd3iTrjHC(_ytWb|r zs6!Q9u8JyGm(I@IijwG8T$o2axbp?#GhM)q8ixyg?r<3}y5ghA$MpVaqY{NlJP>-cP z*Bq{fBMYvn)@JF7yy>H+hQGp<@3e7s1#Dqg%nsY=J9z#v{7%zqTlnSjGF$j}F{QTf z6Y3IM_`asU*}`keU$BL5jw!Z$8T@$m|HhQzV$ToUi(<0mGRpkY?(FHLJZKKoF`L@xs zo6NS+lgnA#=#eqGw$X#sIkwT7rfeIo!GsTLT42K?eBmRTGHrDyy2BqCeYqB+IgRXF zTwF@eFR47gWR2gdH>#B7Mg{lsqGk09g>RQo6ZZK1t>NsF(}xz;H`eR#dEqXS{z~A5 zua{i?ddW|I**E>>mwvUR@+(A5dc!Y)dv&33dI|0u>AxF-OG*0mfel|SIlWVgdtk_! zs!$9-jJD!iUH!i(fd_6I4%^7kY@hc9hT)d+8*cM8H-sGpmQ&;5g}Px0y#r&e?#K`N z_5B);+|d{JYdO-TAKb6~$X)#xL0w0F*Dne3dRf0cFlfXZ{fdGHZR?jG6uYNiPEh#K ze)AbRo#9iMhm)Ac5`qq%>6buxs^PNWiy1!eBgd|hm7RYd+PQgX=boXRhle)8wJ34$ zsVs|h?L@1p{ifpN7X4smtSd1aI zRiU@NbWK{J4!XEfHLoz+PpOmVOtiJM3gPyK(cE(9^oO zO$%{cMDcHE$7nuY<;JBh5yQLaaX-AJfx{4MCHX4 zq;9JX6YgtT%ihR{LlxFJ$Li)BNA{^ZJ=MExqUKsV9_b$Qmb&e&)#Z0=yy;i{$To4= zO{f!caF>vs{fni>f(w2(W-DV>;Wh{yJ%d+C28}H));S?{ zb3&0pAb-T08!J=U4O*hF1N8}2%M%W?Bc^ZxZf5_PUgi`TB=64vbf>OVsTMBjzI%Ob31Yy zdQtr5mZb2p^>AbJVIauP?&f2E*>XahqOe*lbrvgIf0NoLv@LFETTJJ- z`5#Y-R3&2NwN@)%AE{}qcKI<#8Ngzk)e$1G+KEX=&7%c^c(Wggadu2TlC2sK+_ACZ=4&YHW z;+x)ebWd6JxRLwI(nGtd$9JWNwO2pfo<6RndO}P3_{M5wW4fYb$B>&H?iu&C?Qn~- zWJTdtgeOc{if@;42!zfe5 zPVa&!cEt9UYpc@QsO_SQPpa0sl_k2Q9q39_Qr5>kJ3B+W5?!x&dKt@Fl_rnqzp!nf0gf>+7UTVArsQ07~Nk1pRreB+}N zo6h-aJik;m-#zx%ZEt>IJ)v!E)|R!pc6s<#dH5hv2%uFS-uNi<_+IMn~92T}dXm)SBN8>7m=ML6+y-}*9LV6-64*IOZ*!_lEk;a)tT{9PV=0}=l zF4^{GJK6~8pqXM@>yBSD#=1nsD!O8ordZds15vYrOtT&|&Kle`>j7kgjFDfF2f`jC#pJ*MedD18`=_;R~NOYF=}D$-r)LU!Kcm!w?ySP zMitaW6y(Pp14a9)!e*(~RE2UM6~U*6W$~FeRmW%J@x24FOkfx@YCJO~l!+b3q=ztz z!kFjBc70ybZdrMA8g)Evtuka*W(2-cWlQs>f5&CgPLyIj*l$A zVLLvVoQzEgtBU0zC|y>OM;eO$_|)Tc^8Ojs-VPSDp3S zGN#T|ZFO~*!|({>hcF@u<1U3!pEZ&&9#&URIgE%fMueeL5XMsq<8d~Ogwa@Cz2q=- zn)|FUgkeb-FDZ-$IO}wFj(hg1A`AQvMzv4R-7>XHJt1!i@vk4zf!T{3S9Q#ag5tm9 z@=pbB36>I~v~dH{vQCGqQ%MH?^5r!Js?nYmNrp28svwO8O4m6ZRBY`>21|N1pZGh2 z1KZK{Nrr@&2?^>+Wgk3RlBDGm-K#7m75_9?Qmg5??n{pt>OOeXl2L6{hbP2Ls!h`Z z* zH{!-rOJoTQ$?Dwig#{a0+`B{K0_w3X%pQ`E9o*>N1LHKJE6X0*>c+NS=09pLo>F|g zqFP&(&1|Wm*3}-@S_s)N%T!LduyMc1p|AUrYXoGRIq}B*SSZZO zCw_0)Uy&Vmu~3MSK>i8=FBA$z62 z{IpOAlEHSFw|#}e03gH*4&K~NTJGaQ^5i--!0W41XRGNp-~PbW$+&^QeOoR5$9I@X zi$H_CzM|%w`mUOLzmRYBorrKXg;bz~JXam$#h#avCH04Tc(+jaghhj%^M&flS}1%3 zfc=hy038CNyW+>Xw94xXv7rzao6AO&7;yU$>Sm(-HBjsDQ@js);1G)ny$BG(l`SkO zUIiRJxqlQ298v@di3)GC7Q;218mud1zxB0LT(iq$6%p4JpXh4gzt~@ObD>KyY^GU- zVk-0*;34uc@}BVWT{c6ODr{)^(q!VwV(B<-);NP{r!8xoHf|irG%UAV=DF3?P#<8h z1`Eq!RBFL3v>FRI6Aqch=H7DKLoB>6#C|h<$`^*&%OL{W`)hAnS0iy2{+K7mVk4kaDjy+?e#=coI$T##N%4Q$nn_1$EGn_L z$+8AB;GpYK!`2qMhK$!jPi2@+Udf2cTkEK&>{%pw0#oj8(iE>Pd0870Hx#Q!f0Iwu z2mc5u8Ct(qSCOOzW*9KrZNT8dw2YZ(@&Q;m$_$?=GKQpNM&|YT2cIhFoRKFYbTnNanYojf?IRLEG1Ek%G<6PEKgR4j5l08oLe!y+A!2m5><_7 zIjbI3HYP{Amy}Ev=wj!!J>zcOY$?B``FNzzlT?(WW0KdkuHTdlsTjQu#(y;{V zq6({SUkwHo1k*Qq`w>M6TY4$sciA_c`afu|_tFq+lQS(y_(vaYl=y}-jS^nzrD2m# zziua@Envz;31!Yy@m0rIl4!+!G|P+h-<@d1!qZN)5_Y{4ZG*75kH#tZSDa|YLVh33 zQX;N#qGbrCUfO6cx?Cd3b1YRTr1#QboAgpIjhZ7Q^-^$;WUW)#QHD9f6HZjIq&FUD z8N!TS8uEM5nN}!7^wN;u3%xYzfH1k2A|&L}rT5k60byJpb#|`!oD>mtO}s;St8u}< z3KxLAE6%RwMlK?H8gV5;fG)Ym;`|rV#eM0KH(j{kMdB)_GGT~wL5m25e#((HH&8;f zD#hh+0Ls3Kn=H~?hZ)9y5t+-_Wxf1wNnrfOGR6nYVFNjDIhrfK1Z2+30WBtwhaD1& z0AZ1HlVGt>N(lUZY;-IBooLQO6^lZQwUhjamWJSm+VLlqh;^rc^0K@O$GG&`)~R07 zqJS{PtB0+oZj^b3{$3Dbw2+8IT#)DIUfPwvpdBLdqC7)?`X6W){!Bw#2MOm3A{=9G z6q_X#!``J%?S z*w;%N?ZwY?jyFfx-AluxbxF>&aOhrp%dnoF>r5Lh@O`vcHo=(|B)r{68^y;t(yAxYJfW%V<|dihnZDdavoIx>+5v%XhA;Pg0rmXpRRt7v3U`9N`50gHooDUI zJ=V(;@`yCg@uCXZXlLBK0>P7oj!sfG%~o767aaXm z+4KZ?Y1$vh6y-rG?7@9+yKI?U*4U#z+t1o~Prs_)reK_K6Q|1ffFnvS6*q#2)X7fu z<>CscDUt*l?#s=!BhiJ*vyX9}#u4<4`N~veT(Mf}F-s(tiU`(a0(ha$}3HaUr+DTX@SuJm| zUa-hL?SkYAF5q|CPhDW|Ne?Q9dI|PirQHkHZ24L$O@9gXqOJ1w!Z)@w*mFbyp(}dc zCR|^*XUh&d_Yx9}yEHv*Gxt+V@vxm{sAxGr`C{AY%L`APuTOsKrnz4Tp~J-$?%*fC zeWQy0^S66SXm& zHtZB5vH~nCwe=EPV6k?kHcR>r$qFFIwFY&Yt$0G#T5T2>%L-_dh-CK%(4<=G2utt? z`;Dz?rwut3j|-WoZIiNFirl}Ebq0;QY*7ESqwY?2VMO;cRj!cgZUm|BvJv}tJchda z*pt+q+ROa#*i(+WtCun>kut+Q3ouF8mOwBMKH{YB9bb8E)fafyp`xDmGLMTswM~0! zuNFN%t|Xv#EuP^tAR&WQg-tZRQTIqFt{x<)baeG=4cOq35AtBA*3hVmlfDUS3-^n!OzMye%=*d`(VP(?9kZHO9Y2|#n#OS^RgR6r6RNCrG*$vj=O z6;C0d9NTpB8{X9qlWI?(9ZY`VT>b~umD_MQDP~IU;-jy zRc$Z9#lHckYKDM_nJPS(6)`j*Vzw&gwk^UK5HU*yFA-$^h}6pI?EC+i4)(p>#Wv~z zJnpx9yAmhwM+XgIMjDtwgC`CeL=B8J2JTn9%5|;{Dfzn=uAC~)R-0r0t%d^9GX`&< zRpv_)>Ion2@A>jF#2)wIR_DuJ8Tezwemz@}{dRu*+xbzA+F34t`GP5Y=IZyK@ zZ)z$$slt@SpBhZ2OmE5+Z^V;DRy^!Omb_Niwx@Mi<_`PpbSFt-eLh4!vEV_TX!HU7 zT^%8a4L(s{YZJcFn*Oa_>k?~RQ?=1P{vk_b$P9dNo_VHR-grw$w! zkuN-!O*OYF>4QE9up(cG1jrsB0T5@Zk_(5hgSRQ_ee|2vT%lB6q^UpQM)~d7%{>e% z-R#4eLEu04;aI6^TA%>g>x1h!fozDL{NY0(wR7_&BF-y6_TkbHfkq9|NPs_1!g}#z zNLVlNA%ulPqo3?+hS_gP=yKBRrO|oDA+;rCWESD(>dnjMHnT1N^MiIn8dtL3o8(YzgF`fyV&qOU^5>}Qq-q8>6 zFfR{;xJTy;Z8>m_@Gl=sa)6Eb()Nh=;RfalViy0257$3mXwPEzAT`hjfq|ve9C51; z^qPS6`H~w-(dyIGMPR>tp_!z|`854XU~f4+i;wyj0(#n`zvH9+iNJ35=x_U|FOu;X z|9~`(=u>3FG*a3y`3LvpQRRkcZB{ffT`IksS7KiXPYv$O=Cgut<>4ry@?=)&&Af_e z1Bd`gA8Bsne$SI-L~$dyhhGfj_*-5$x{de-5D2rw$oNfVjI)@qBql1GNhoAY51N+` z;(p8%o;0ERxzhhcK<;9mT^@anK;P#HvrUr1wwi$F^CTe~qoIm`XY+*V5|ytc;5T`~ zR4FCCf`F&&De?aR1a_L`$l|L23C~sB0dkteTL7E=Jh)Ht>fl-b9YBy%vK8*85!|Oz zp~MOw>iIDmY*jzXqbq#SDp_ed_ynxWWB=ikwMwU^6#Y%ZxZP55=~sQwObES!tI1>k z?jz*nkkHNCzw+1(K2SMsE&Upk8%$#%$n(97-{H?*3Hu494C0)ONdgIL<}g6=33ld z8`jDFGf$Y4g+27kKGsCV#?Z~LfmqcSgrz&VzoYsPL@e_e6&<_Mn&`4!7dvj-#9bR* z#@PM9;Qn#ov!#t&6;)P6+?Lf#ONdsCEKu7GS?j7RQVi}UhAYk!LXk|X8~vOQw<=F~ zhyW_~Ss!j?o-m34xJu*G-^8@KaZlw53^4pNKF}mD&l83a;Au=}+>$)O06=`or`4VE zo!0Mh`71Yo^8 zxygvbKMR1TI!V!tT*x_+Pm75wK9AQ-3C_eh=5tqSv@kZWcYhp9o20Hv#vdrdohTyf zBdezgZ9^U7$1uiFCQL_^5M&AA9?0W!q#(l41?ZZC@_3U3l4+jnpC@KZ5Eqaq?cc;K z2}D}@1UDvZ9%D>p60#T*CcFMjlsW-#m%^0g#?W!`o_TR|Eh%nrnZvw1OaGCINqU@N zpJbknXC6yr`Xw>+Jm%rlplzljohv7tKs@zT8-B}w!*!aNUv1RG>MJ&CICVWFr5a~V z)Hg9d+bB_e$wu{S!i&C1<)V#ZVaZ87jTwx3x#@z9`e%8YjpAdzw^1L#`#p7}>AW-_ zg%8`2%M2aLj1Oj_AE-KfRsUW;>*3$^m;2Qn?$VFaw;cWgn|l4J!qqrCEP@St+sC@v z-3B@cCXKMD9H^k~9eoWK53o16WHS#XUxDIJw(Rl+Er;Rz#B1pf_4;XEOn`FbYX<&h zzm=~IGLP(!3%>3_u}H>Hyn3fQ+h2j@I-Lzra0LsPfbf>XclAY7`{6tK`Bc~8+xjP| zvctFZYpJTkH}%J0RsH*`>&9%*Uvy*q3)&%#{>S}l+=7swbq#3?A~x%?-q%*FGt@)w zUu)=;hg{KymbMeL#m0Nxmn>@Sx$1~^t`Esdzh{dKCAs%*Z9Hr^_z8tcP#H~W6-qYaXS~%_EHdb+DbO- zth;QW;Ji}aH*Qgy{RVL>%+jKW6(9_~AM?gyGg(F1(B84Iq08(ykSj2gL&H{AajYCH zPl9OzkuiZKPZ|v*5SL{Z!{qKypmehsD);wn1^eywh}qMd%P4iiyReljcU5G2FNkki zP<#toPG!aL>f|bSZiJbvSdb{_5kA}#5+LHKKHy;B?9I(G^TT}f<00u%?hx7{A2Yv4 z8qhRXxz`nc>*lI3{+oZpXTn_NF@MrL1jT;-+bH9xr1j%Qna}FhuRJsH1nCG$NLwW3 zvo5TWlo(|>U)OS|hYK|er(iiQ8X%&Wg_HQiYGObTR2rd=G7HBD{Q)2FD;$yO50E5C z8W8)2nuU52AV|hFxW?N|;DwshR;{zrcNGI86w5jIy~c-0_i^!USg2M&Ha2UQ;-XRf z$5FxQ2pz@VmFhRr!e;A8Pm5oWWn##|O9K5(hn0jF`f^;zc!e?|>!tA}1|cK_|5OHp zs&kw!#At{JQME}Q(uynvFOT`%W;S|A-)pcFls?vyOSj;=_(uFqcKHXsD5^uhAiEotv?)z z>9)3Ze7$4_0g{znT})Mu&FY^s7*P9&Z|eV}g_r-f9}MKTRhp$2eiP$PlCh5XChMh; z`a8H?G#)DxHVZJ30={eEH?Ue5_G3ttYFLhve$21g%Ee?!GQ5ga`#?zIraM44AG6Vm zi?D;d%Euh$g(Y&Tgy3E6ZOx^T!u2C!`toc^6cSw=ZMb7O_b(O#bw9G6Z2OJd%Hq5& zfB^scjkACv>HvuRZxD^P0Ob7$`0j7qCIqCl0O)r?bMyx*D+zCR$xDpvD2-^d1J#!vWs#o@gRMqDn%r*L(AEH@S27 z7QhFV1~P*?cL$%Mr_V5a-o_{G=`#_ZH}T1O^yZ8DSjh_h^Qn7cr<4%s8*Y*1S!e~E z2gi*g^C3=u4O!!HBrJdTBJE{P_`o-E)n7y)Tj>85Cv-81U>s6H&6M{fWjQ6qA4V8S zcE%i>MuHEr{th!6)wqc5C-3!?+~cfpI+Hq2|L6_QXS0Hsi4B(s&MEIE$K7>IMbWHq zE;GWY3ptllW5w2*zUa+GpvGl@R2kC830FAVD-{GH&Azpto5%_$GI2BbTn_uaH#eR% z`&CjwBwiqL2xXCGU-;06*vN-ioZq+^QdYORwQa@m9R##<-eF&9XH;L2^5cfF!p%&o zgPigh&aM`}wWlW-S>logoP?ZTO%+Oqe{;OVGlWd?i^RKkfo!2i(9O2-WCBcaEul$0 zjc{F>M4BX(%!VUev760%?Yun>YR8u)^Bwz@lnhA+I4_g~qHT*a&RxQZr|h^3iej%) zSE5bCX|8E_$ubwVLYk|9G3A%wclq!#>j%E>CXd=Tv>{8!Lpg}<3bh5^#2_=pN^UD(IMHO8{a&u#;exIxy%*Rqqo_mIEW%52z{?nQ;ybNzE+J7t*;~R&L*j=C}6d zE1mPD_0dL&Tpvxz&CQh!I4ly9jB!e!dqd;A{avTAe#5E>ot2A|O`{S%HuEDCSd9UVODJCw>i<^66N^O%pEH`B7 zOO)%x1dp~D(v6nSRY1W-JahEgn1@8Xlsz~%B0*P>)RsiTL?tO4Wmq;Kmr~~;@(e-< zo2yuUMoaEdN|C`^yf!x?N|zQ5#ji{!5rNOvNmh439L=sxglb4GKe(4=l$^fSzioBvh6?meVJ5rhI;Cza?eLsb)*u+@WW^ zCJt^0jE>!Z^v&PlmPPNgJtJm-yAk4v%W z=e$4P-SDrRvJ>5uch%u;xbE<&_-%E`_i){z>)fTop2yM>Y2hNthec(nslfaIqo-q< z*qY;bIB`$)HG6(R{=>#;LSN4bF;*CfXKJa(qskUr6+KA z<92K3xrU;MxDBloTWt+%a)h-xlxACku5F9d>rxeM>!ptrMcp}V<9Tghjd}L>@He_O zByOK}o(2zyiY8U(X6Fd05;^^n90)IeBVsC9O5`Y{d}OX? zj}wnwO0n6d{YKXipCiPBy9*8Jy`^h+l&F?Y0yWcH27n-hKL*?5(%8HO{aXe zB@C7;sKqG>FwmZ&!0&y@eI?BoxX=BuZQF-%zIo({>m@J>1f9Wu`2<;IeDc08ad ziZ)?Hj$@J>mV>tv#T-m~5P(_yL0D?gfs2g4ks0zJGaYw%2QoQ&W`#eqT1#zQn@%-c?e2=YKn3rPy%xM+x*b%>XdF`FDk>>6`od?ZYrfvb zJ(Dg-n~0g}bKvDD-2!b9r`F z`pq2p4TC!>H}MwU2}P#6q-v(?q^A1OBy^aZdZEdwlAU^?qn)HR<$}pkUC@b7>&**{ zeCI@=qny?VDc;vkRB^I%ypv88e%c@79d)9z6P@GLJ5lH!~?jWg$<~T(WgTN!{c(fOpBS{C&a3&A37gU(%gyjdyWu=5fCoDZc zDlQod%+?_!?};SuXp(oJdADA8%qcKGO3Ih@K(4~0P6V3Nwh-=w5lPJolbkV9#gZ|P z+QyzT#Duj;7Ds)hmgB~lT3uy?kBk`Bi}3bFFAwB`O?IhKq>#Ln7WY7JI()D<7i6;U zyhMXH*WUrq0Q?*P`=A^*+s_p5XIkcG;{5;5`lB1-{h>eNbpcWumE0d)?U39bm3Bz( zk1lGpZ~gINw$q%UaP+b7oUpWwk|tK~$n>x&U=&QZup@ff}DdIJI#)ucBnmFDwxHj7`#K|xZ_%iNp$!_f$*YQ}Y z(`*fYC-H@z)DtJO;%9Cq4WfJ{O(k3wC?;x#|q6pZflsBjYJbNE(zh}BP!iP?WGV>=I>P6 zSJ~;d3*)jQ8g-XD+I}SroqjSqVZP$KX1U6vP`&bAqI7n4{CBAnJ*4IOW)57EvNLRg zK}tRSlHoNSH6*mr4dmcN6F#ZN+$Y5smAd*KvrrM9>;mu5U&PG4T9akyz`|#@bL09@~ z+YC+i`HPSRCfatY_(`_#d#At8Lb)3=F9{dPM?!i8a#Zx@$c~cbyN7HI-)0GSvM3GI z)V;E2i=!$~=YJ3yq_+M)ervagSRuo7QDex2FdA&fG=wC(E7Ec6Q~H}2eP!z-)k8Yi;a7xZ?Ggbb-F z?S1kP<3ETQ@?RTEU*L9rD@OHDm}uG>sz}y#8JD;>$VXHfyqyWVdpV#7*c$$kCG3={ z>cu}-Rn(uV%090BgURAH6DLRh51Dt5z`FTPmSgS|I#Sqe7(y^fR0*5=&_AY#JkUVC z-iN%9!oMpase3|YANpJhy_Jl@Uislq`K{>-d@6;tIPxp&L!U?y-Bjc!!vGkD|7K>w@0kI7eqUksJxy?2H!QaGHV?p%5cGOT~py3f#$>TEmvnhgtbRQ!5uhvznvUaKfm8DXms&|Y}3SFEM z*|UJU5r2IFAtwDWOsz!@cY6SZC6T>Z*jxiPiq?>AYAt)mj+XRgb#o1lwuVkhpG9<_ z^y*$d3CQWP*fmb;=}A>k0kr1+0dTC?ZIjt053-2Ydazr$-V=12`-<>Um3FE0>;j43 zgp|?ZZL-}ZeVD%^N5Xys)9Y14V!L{Rr=`T7T#$W)L^LK7mW@fHrM)fR*(0R^tjuxJ zXxjdU3GN{MvcS-2sokopZ`Pftp(sfij0--+)TrKjO33788+nr_6Sf^l9nhdcS_GJSe~RmAAW19S1t!RAWLk<#nNFmY~k=pA`wpDH~rmwxu%k71wyqeVegg@3hKZlC(RhT7W$RCN~ zYzpU*)c!pZdBmT{GvzjWQ*JXPA1noN>QiEjQ=h_9OOzM=73v*p-jg;ecB(95^K;}T z_3tepbyu89k(#4p?fg@MmlKtZbZSq?+oOf)^BvuRilry_&PU?MwoGY${0MUgzmrTj zyU2vIXFf&*nQ#uw-$|UjMWuv@b@sj^X z&LPypf1-|&sVfNe(?3$_(Gt~3-b9}vAwT?MNOpvrjzrz{M=C!|P6xe)#NPHNI{k-y zgm>ou)tpQZk-|ClWMU;6Ppl%1H`CF0kCTt6l!q_~P-nucf0+(k()FDq3Jdz$^<}B; z__aM(ADIu4E~uwhqI+TfpCme@k8RE~JttM9Tb_P;eoznPE3WJzNrNVh@{*ngWpD&tUTAchB^SRW&U+8HVD8+g8zBu#emzMuk9CYG9^k}S29~szzQXdqb?GZdk z=i{D9cAov8n19f%Z}yK$QLuV>EWt0PvOc?eLq{aDN_(=BOp9#^)T9&0w#1P(>d=10OcsHv9g#WJt*Qvh#c7)E6L);IYY(FJ{I+>zqIcpLu`WzI^#KIWC({;u?}8 zmuAKoJJ?WlA>5!KT$B2!=yLq0p%Acad&wB02i*vS5Q|0>ND zQyh8n(=Y>M3Rxu2WB*5a@>6;ms7xX8|0YlLzp14EP3xih1=}(rZ)SkIgtMqNrK~%JSs{fsNn|2M9-W}Nt&INwW=OxIvQla98fT@Fmp+|Z);zcPS~8qN4X%Rs&D6lq#TnV=3^+`L zFGKTeWz#T4JKiOI;DIi@RQjc%46i)R46cWB>Epr7W@U^)anOzETy}EoK3h|eq6KL@ zuFp=-5S~wECKocZvq|hPG$i)-J|y-e-({*KlE6Y=^XBP}cA+&r5g&!1$lj!KDOJHF zdE1p;k%668LB4EXvOF2{Eu75~<(R7G4@_#@q6 zBYv%=Z(6Xyds=Vuce>kJ>CbXb-(F(zZg+2qrR3F(wumFBuYf0*_DJHbZUne3L*|FEgw?uFPYTlwZb=$|(A8+LnI^vW0`c~0R?N3Y!T zSG`iBpRF#welMr$aF71@80+C~eN7+}J*3hrqY=i;)B`OrW-c;to*C`1XMWpQd#zq? z8(Ic?=3~QxUrK`Gy`dwS@iUm{K}_zVDujK{(~5wbsoKN!`VGD<2zbRWcohN$KZhSu zhYcPXrw3yU@#nhjTgR7r;_UiuC?&q`Mi52D>43S|wECcve%p<4OZgSwDp?nzFmhf- zxJ*S#uKellBF&E3xMPNSLON6aJ^jM9MCyC7g~Y%x-^{^B)Qs2L$d=fR*svq{Fa8+m z$Mg&L5-A)1r9Dy&XZd3!`tv_VI+}jrH$-Ba?UD9zFuS@xC-EqWw9kpX=JX4}bE)6N zW8KtXQY0!c+H!KWqM55Falduql0N*$xbLT5@Izes2rMK>Tpz^!ZMEW8uI>J~$j-HN zSGvJysh7+*tU6-uB$~+FXKb|3edEa77dIrtV1NP)w^dYzFfvtcCR3%bHIX_mF7Q(H zljcfD6&&8DHq#f0YvDhJ-IYk)qci`vur-c!@3CqBTUeQgGl^6OKhF!3H8l{7P#%Fv zmRDB8Bw1qR{Qa!R$)7+JcJBWe1*^&1wyBz?dg+`W``rz~D`~y+{Qe|5mMn{8P6wr| zkMdEH_=H_}CA2|Xsu}OUMZ z?AUxGY8ds56FUN7r>?ut$z~%uY@Bg~t2X8T1$INcHwV*Uf>n8&TuV z9}>LM$xiiMyY_I7tUdH%CrOoTAIq-kK|a)z^y}RY^N|0G^nXutPG8neQqQwt&guV> zM*WTQr7P}BpZh7z?C*E~!Kyd{y@Ht@!z50p+{oiBIkI;-^0`7qJ$@?5u!E(3p@PVn zv}`XWj`AXi;#+aw8A@IbDOsn5oA0x$X&Na}{p_~w=tuwCxkim!e7{7@;9e2?RvTh< z=XaBpriSDH*?l@yYO%~q4VmO4Dq{cyyH&jnM{nr9kaS;*)urJO$m!sXbUaxn@NtwX z*l#Y1=>G_l_ym)a!>o9=3|B?AdsH35-4##kp&RR^ zu)YufO#y z=5r5b2-D#&EKba7gqdp14xDGJNo!NWg0w6vP_U)UN}%0V6Vz**VXJ2U4Rf=<`Uc3tS(*62cm++m_qt!R++A5yJ&;z1h0IL23!TB zOR)zqzUy#di89R*sXl^pf5X%` z_|-&Y8cG-^x_u!uf$!0faR|PJhZ9NZ_-+Y74l2NW%})NF1WT+-jPrLTgwT>mZIBq} z@5nL0y2LntTZX~9#5jLTf_oX~Z_03^4i$zhH?TJ(gj|+~IgL|$c>K5a&^Wcn6;e?Y zj{F_%aW9_k0Peo_xJ&{$wa3pSfOC6X0z!BTg>tmVPb7v@d;CaXr}hv5JGaLVfF12| zK9xy)oXJUNRupjGrRsCs$-o;f4ZNSxuWsny^n{gw{{YE~O(nDt)}8A)O^l||pQJ{iTQzFjHC72lj#d%RX<#J^%x4Kyv)rM+ z)8LvOVr|7TD7VbFn(|A)kfxz(-lwMUXUT&LQr^_($a^$@Mg#8*@_a)2*}qV(*-Xwn zW(CW=o2qw}%C*;>OehPba(PMR>QBlgywzK(H{$8Djz*UYMW2!2-fl+!TY{ZR^o@iN z9&r?i{#r(?ca(?zN{-T79PzY-=q-(SN`@PCa$&?TC784k_WRSixu@fW?i9?j_QODz zRD9bHln_SJKR(>ua65(QEDPeDb(VRle|-iOG<;39Wb4p;2va&0sCb{FZ`+z&k)hy` zOa;j*9~y6C$o3I9BTP!I0k78mzJ4a!gT3%Uz4UW$7(RotiHrlm^8_n>~F z;lcsnN7?ehf({G?m@2QLNLg zD|Xse!STS@#Q z)Zslrp`XMkE8ykMpu61QRCL0B5p%m*jdbgm8EO}l8H%@yKo~8x8y$12-o6@P>!8$^aoNg z%H?9Tf>|#bS5~DXBU0Cq&yc=hocpdJBorTEbN?le?^1LK7&^Ty9~*|HD6(&)ghYhi zZ_SSQ)B(I_JZQ^m#$V42@!tahuG;&kv)(|nT2 z;r}U7aF73>>T$Q%zrkZbpXm4pyI;`c)hNp+D*j!rWX1&(ySE@>*U+EXHTW^ReS-q` z_@l!gbmfoQB*{`usb(k7+omiZsn{_7m`)$ANghYW%P|}GT8e$VyrSVMQ-l?9(s)@* z#*4-=UN)qNYc$;Y6vuce*Kn@@AmhcI2Cyat<7K0Udl`Q*KGtde1u;sAv2flNXw0j+ zK1Mz7U*0oy+g0p2TWoZoao9h9ALF(XX9F7cJA_zSg_nrM&un3d6>nf?-3*VzGVLsAv$5K$m(c7I&-VmIG& zX{)cLHN;j?Xd8l6Kx=c;UJ59c#I}lvCTXjvp+dU2RQe!o1Z<@k+p4(acjgArzVGLK zzn|Y9;>|2)&YU@O=FFKhXQahX1IFBWJZzfFhNcbu2H+3DOwc57cX;LTD=V(ehaQYt z1pfVpgls$NL;S4wuFi)(zyoh^_Iw6Pt*?OBjNz>K(_wj581Fmg{b1QW@kOr#@dfjt zFdm)KR^6B5euW!rd^DGjhwsad3WxCi8Txu^C;WR_)zH_ufJF2=cW%w!JylFZA{ZEb~aL^^~IVFjCi#Y+~0# zfgyjK|9zW|XoTm*c=ji*)$v#p(^wyb^NrI$QHJJTx?|H1HHVKk?hD#{V`$xkymPl< zkk8*{&SjXXWkU-N+y$V~x9)MJPp*J{^ZK&?uy2|j84&Vig%mdVe=%_WG_+QrHHvM$ zjrNwL<2JerkiN{^vkdM69L@N6_@=}E;x2&ghh-g~WZ;3M^){M3+&Y9{yFbEVN%OH~ z`7=4y)$m}sDm5|ef+#9HA}V@()DvM*&xb_48U((N;j*Y|f6!{exob$~%$&pZCMi6> zFwEB7OLM?ReBr`n5VbzTu%{O54NzG>f;Xh#X)B<`$pDha86sgK3(35}@JGu! zrsvdu2pHLo-V@c6F8t&Q#7^uPdv#h)@${U*Yw$YjwoNI26Tv%-TRwt|KS%c3h@1-t zz_yH#Za&rQ^Bb{+Ybqgbq!n%qtH7a@dzTLose(DdA}<!@%VYBAOQqb70PF+XM)d^KJ{%({4>sy)^o;s2jKer5Tf~OpG#uERnJo0a_5_7q+ z-UdH*&b3w2n(*Ku`G4qJ3bp1blrc+MIMMYuyaGi%WwnK`kU%YxcO?yMoZs~h|Q zp-7vEUg&kKbL4PSA9I(z*T&iD@CoG4x68KsPJkgY&rcBa4y~h9ZVpgLl_`XqbWGAR zh5pmEQSZQ&D7dmY0N3c&D~itRxma#Rz1sCbtGQc_S%vTTWI~u`A*!7~z6}>Xi$gOY zbWMJe0*@LCza|a;N}dA0f`)%>o^@YIRF{4mU=?GUy6kTX71yJlIIT^Hur1Z<1>{!e6weWH$jIR zwT#m}CXeW8oDL_uU(_i_@?gYwB6(1G8=uWQod+P1qeTJwNO&yDM>9_MEam1k`_66K zV|nJAd2oa_e`HRNT+RWvrT;iW8`2?vHDAda7Q*Hm8Se1mc@4NtH}{O>fj`Lj5!@he zp1JE@UMW3xB#&5?qrQN!jGL^LDYq;a=Z*Fvk;rLGK;JdrI3PhF^Dmq2{Fby zwD6kiy}05Xs2vI%tLlQ4YGFyRfx!UZ@)}fyG3NQAg6W>?aEnLi5UUDRpor7K5S&`G z8x>5T+lSlQo=0uZOEHMn8qbP(*h)hV4#F5N2!ht&h8QVe@^RntlD~LwUmi}TfA!)5 zO;`>@&w*S}C4TXeecp?g*?w<|LA?HXc-v&cz)FM6@A*IE@yjaC7C9#j6uID^XQ)lk zY2Z5GV$fdy^hQX@YhKdrExW*t%wl}5`*&___Q7@>qm{936BCC9SXV?$i7X&GIxNdnB zI}KynmX;@?B9_SNqB00|0qF@|s3_zi00-Y@EgItZ`;v2c}p?j_v{(2(cG5UgCEC*5RVjqSy*Ye_ytEh;RD`JNt zzH98Lh=qL=axBY>i~If?l zxI)^|T-fP=L+&X_`H>6&3@;u`mo*NBr_!4C{~M$fN`8(LOg{dK%|v&|-N#2@tN)Fv zuHF9u_hvz{J}=lf@@B#QjrYlxd-xf0r_1oCSo=K~yAg~RH^faYZs2RY1tnfV^3mKZDzi+kRv0Iv04rQDBGZa$ z4jt5>k2cR_a5N@A8>TTwUgo)6y6N8BQ&Xsoq8(Y2cPV0p`ME&@%h7w!gIDC?a%PNY#|L+w5hc!hU?69mU6$%vxd&NE2Nob-ZtgI+A;3e8O(GT59c*zIh zzUCiXy@yBYb@}RXt^F@RAQ&@a5DaDH<Ioov920l_@D!r4kMcn4P}1ka%LqT(WkE zBtx_#aw8zz3e8BENg%=410%cv<1fxdJjZ@8ltw|EKF&8(6AYV1?hbS~JsREvgG;j^ z7~@SJ3bwpk!rV^hmaG8xE(vop9W{*yknru7YiO46&A*xjQrl`%i_ zqfKlOhCjguV~i&UaDudwG236vz&c6YLf)(FfzW zFOA-OUz$CKGfRDMI;JCvhjizJLG$DD+6 z;@H~!eh^TXwswDz+u-JYdJe$!sUkyTo9g37Y6KqCP0HcqLH#AVM6 zB_iIOoMV1+DUkW;dudMtlrRxQk|?^4hp!TQrfR%k26`B||IS!)u6}g}c6P&{ZOwE!Y2ZwO%GPAlrwktO%PXs$eMv`ORnhw!W zGbNj^dl@mzDCixTDG~qfW%z*IYJ3TqA=y0OJKN_oceA+z4EIt_F$R3$wd=uba9|j( zO09q=-WBLwcye_OJTkcv8Mjs6SHyPeof9zkVoA{;7N!EvgFeOx7$~{WBBzUfQ~bRG zIg$8a_53HSDF_yWagezF;bmB7qXnKnp0TD8@Qht=bdJSXmBcxgXFi!-%j3?1-dVyf7-{4rV|2t?eJGoYt|*@7Q2wuN zH2s%0=s8>)mf<#DUP90Qf3^WM7Ev|XYkp0KO^aNWR)L9pWRC5Ws=njM`#!dE#`LpO zrkM*f7&t`3jjYiP=2mIZZRF-5$9`a}ow4nJK15`(LL_DkD{3n0SL~_aB9?NxNYrRN zN-2T3lu8 z;UTHbA9xvT8_x=!C@k24eH=A?Wz00@)$Dly2VQ`WmM|NDe?W?Mirv#B%q!Wj^e&M2 zsv@LJ@}!vD_3s@!4oB87Z-(a%;F6z>WH=o)GCzy_x9BD?E3?h-EWsfNvwO0HS)Oe! zg&(lwPUcV^*`q=I$Fe!jrj^5UluP6KM~uK^dEgcXLc<>qFix#tG}($DHqnzH8~TpK z0~40PM&2h&n8n#<>td)9;Es?mi?Yov+z%3dEX?*9xoJ6q4)vJ@+2+q68}`SY0{3_c z+_#^bZ8k#+Ii8d6aMU&zdN^*l{KMJiI>==sBoHC>s4pS`zyZ8tfoza5W(04r8!R!` zYlhcTcw!@DB#&}v?hE!)ba0Ujd_YPf{R6Jx`tl%0e_sxL)BoOjhw#MBwfDf@B0XG1=mTUh_hq>MxUW5sxlHv)-$;?f zEMCm)%!1Pn)K~JF|9q)=JN!J1eww|h02PZ2hVaTmeK8NF;&?1<-^)gTu$TQ~+y5~C zp30OA!=d|zaRI*Gl*O&BWbbg=Ol=m>Or4rGHN>aYPlGE9fH#!=R~FK=Y4->XAnoNW zi}5ZVLH;~kAJW$NV7guRrM>7Y$9CVB_G}g~1-_f^x-Tt%2%mosr@`hJd~d?tx4k>! z;IN6TgcA+=hIhvophUw#GaQ88^ol`-NXZ*e49>O6d#!5Y0I~ z3&|b!yg$^s23BikF_(b9&7Sjz*GGM0$|Pr@Sq{SNtUr?k)nPfEH-rA(zdRue_uo1) z510j6=AV!hx!_MrAb|D=PsqWFv48&-6Ij!0JaG5^pmYrLP?q_jETc(xxSm}TK*VP$ zqV?cG`px2@G2_U`{4Iqxo9sN8i8`{iHC zc{JuE1g~YJoC=le*O2zVl;ik`O+|Bo;%}&LSFis77_+A?Rr0Yu)*np|aF8w{?$x>&_}>uLz&2v7EDQ8Yd+9005j*6kmq%|(X+u@51s6}O@a zOa}0gwW7-R6OC&(PklfGKOwRkSTDHrA=^ZIL2%dF;JocZ^*#-;PqR1~4vwPNgX@9v zi$`X~jnVNj17E1L-_ZI@(#Vm)(x?35vA8c(CO&5I<#8cp>-^HzL*9e|^)3yu%Li%m zuP6>bK1Lml`WiQRXOYftxkwQobexA3Rf0KZ*sOsp06y8tXWrJJIY*ng;Xpiz-#Ta* z4NAnO0A>rgSU|9Kqd(F&knV%>|4az+IXx6>;Kg>6(Y?o)J#6=7ieR719^evDd1iwK zqLN!ieiyZF(T!qW(x62h)Dt+HL%^Xnl>Iz`I{*RpT%|#a2wCN;#^EJKnoaW4{IEEo zWxsU|t+}Wy@+6C~q8fDE8qRrN$L^yc59$6Mt< zS@*$RGna!37mj_%%CTMGDjvJ<kF&zNlo&rcgM^xs?eW>=YcXpI7RB)i?gYT+lWe;qgUVcS0%L>=`bF zzs7DDDRm!hQ-B5u!Zm%Fmi^fO$O|t+#t04W;{|S;>|=(t_s|^adnUkj3h`fyo>}uv z=EZ~}&t}98_^*h~CSdP#Bllg=GsBSQm=JNL7|H19#V&)wH;R5xn0IBH~y ze3Hp+Ukit($S~|tjCTo|((wKY?W^vaTg-=<<{wkYSBK>;4hxKA$Ez6e{!&G>bHV_( zLP8sCiGOibCQ+3Mn!Zm?Fksq(Cs_vZI-)6<>0KHj)0Oje8nOEZ?zD}oqnb3Xa%ri# zR_4+Kwgw!MpPNAU;B`z>xxIX}#WvNh39FSIl9PYHWl*{muQ?2dKCTXuB@cz>w=iMf~bX-$!IK%eSTNRR>>mRV8CFgUFqJw7TSJZh~x z$`BEylQ85GmUo%k8D+ulXf*s&!oN89m*A^MVUL(_>1^I37Iw?s+R^kaZ{_Hv(x_>n zQK@0NlQ+X|1ei|#8MYW=Z!K}(?!#s1a)jKm9Q@Yj-W2n|rb#E?+`v<;H*Q!4L6ZE~ z!@nr%jkVG6c-<=LV+D{TVw;AN{(yVBjxQR-Eq)M%K}0XPxfyyk^o#ua%|pC4abeoS zkF8w<5tzW8;PYQzm%`060sGl6tE7sL0VtfCr3ODfvyM=FZmf-Q4*;nj6P|0TBgdZF30uknJc6jla4;m{L}1iscMbKM`w@LNlzu@5x|0iy3ZbralE$b!Es6w6=!~j%>M5d+Mzz4j1Qo z5|_i}BUcZ6%%7GpC=6n&AoK?I7Nadxz|Vr58=NzN#XevN9mM1+jAh^rJjgNl97F&f zGTbvCWVol?*C)xZFPp(m<*eVvG=9Dm|@-pX>{0qY0qYucR(6B=DxHQ8Rl|GbB7G4aeePx><+uH zpU}pW8Qe-j2M?Ds$0PhS8Fc79{LIs#3(S%XGUQ$wXERC3a0lN9Pt`BbaHNZ)-%{v* z2bvgY&{YxLbhJ*Dk5UTgO)3>iNuZG@!mtiEC&Nr5eEmo8px`I-aE4oUuT2wQsN;)p zi6H?qB=8Ig0#Is=;8P%66k`HCSeF1=u3DEMqQ}5(H=wIKy=$q|UEUQIEiB9lXCgBY zQ;;Hmbfo#TlnG0rg|O8XOR>VFa3&%Hoea?ee{`S-xua58KIDSr14ED&kDsB1Bua*AFzV!pR}@s=mTGsm$tRb4oIPgud&C;lyopI~gL zh@D@N`E-R99&4u1Xwa4yfj#7IXb!3 zUGt^xH@m`K6%{6q!bMJiC~PkjwtpuCy$lx@X3G-zm?}@GS|~&a!!z_yMT}q(A`RW|SD1+70tA=eepa0J~qk+%wmg8u&HoCtL&WA z+MVCtk>4{zbW+;O?}IH#T60?c_b*~M_?h0=oBTB8%IdS38sgFzBUhzCa7ajBzmEJe z3lpzeh#)%=>?B6Jh>#n_wdHj@lW}(?Qx#ld-b&D4_+lwj7#0nsnjT-_9Owhn1h_ z{&Q4|^inwg_=DJ`Fn)482ADL64$Msxe9(-%GFP{z1GPx1D5hcQ+a~r^Z+{( zH)#^shx_awDo%vYaWcQRU$BPVk+>Lr*JW$iM5%{S4Gdbt{^qY^rfauZ!z%p@#=)K8 z>nz6BgW;=)ndvxeHPVmsFd{Y`p7xGS2e|YcScus=*VBNHFya*bVqc3$6`yUlZQSM1 z#z%sC*k-^W$#VJ@JsDprH5y-H^{xiK#STtt_G&Dqe%3yrTzRnCGw)OjA2bpcMi`mG ziSYD@GV}QK8Kr9 zU#U#Qdl+A3G@2olcUU-b5T4t{BgbEkBMh~?;=K#YUtMS{vFgFyuf(a1Pu%g7zlGRt zCn^paq>dsJQEnmhtO=iY0s10fD}V3g+5WN6`jf6TpsRPCyN}UV|zF*NMT~H=*#d_xp$PoklGl=vG|T5!($!#TG-5 z<2)l-$Y%C&O>cKs<0fK&9_sdEmSa2rLi&JLWQihn*Dx7?)^UeX}`-2 zJllf{%?~c@mEH(1t?&A`^@3xt|8n!k2d*^mTtu*r2=Nf3okUQhH@mSj(OQyi?N7A! ze%Sc$58HpRmUgy(OLQHvmNqwjO0=K0UT7`}uaq{HSWi{y-%HfKm8g9)TfaTKO1c$b ze|(_(9@Xfe%d$eb5)cEF@^<`*=CiOtjK{R`lvJ_%PT!@U(__EYueIIAN699;I{2=y zZ!0NnkkVKiSQ8ony##$*pxccZVm4Y%-`4BR`Vw7?V_W^%-F=*6)l}dgF=7_8b9R=OcRbVmk+T#3e{bwsYcmQDE)n%6V zU*9{XW;8rIpu)B=TOL>>lX) zG&lmsWU3ZkCYosvy5E<CflX(~xuEKJe3m2T!?C;yI;2S)|taf8Q9_XTG zqpS}Y{f$HqHA~#+wGa=(Q0(`OlUI@)pSw0$=(rujsTa1{=atnK;vtFjA!aEagc zCYO01F6PbW;liQ-knv8T!6`6M<8+cD&=5@2c!T71@V_+K@=X*0?%FzPB32ctZMJGV ztxhW3LdgfJf(_Iptu|jrDIiK@&f4z;@Fu0-8{~7nc+4tk)^`Gv=neYWUU`z8APjzQ z=tOULh>6BaRwpsZLMWO9U7gl}yRbIIraT*v@|?&($Twq{fe5b>oR2&oe>3D}V%!(w zPE?p)M>sZkOj_y%c&OsB9fOzC-jcs(pdxhC1Xtjh)>(E+IX_VD0A{aw5ihBkRqH2< zw>t)p-}0WAA*=lS1;@@0h4OguZEx&RtA&b;dMm_4kx>gFj5xTpL%@D|$EeNz7^K;W z2s<^|mux2{ySKxDOm-0y~7M=-0xiW=6Q7$;9M z5hNh_LjB<7s3iXJb1)j?@g+K996rkAgbDJ7XHk)7(d+aEA-A{Y@e-)uEQmW!g(#`8 z0wQdNpg~M-8U?S7%D`#Y-$0DU?}BQVK!nJzo$r`0mM6&_0}pJHSGj^!`fZ!=QP#to znz~hko3>u{P(dDoz=fDMco6t3G~$6QA{2ZsP^gF8pqdo^@m*NhS5fPo{Np>}l|u;F zxmn`*MyNYubomPm{(NnJu)bem@aMq?Ttk3w_|Wz%bY~i!z5<=W`cAGuCs&|Tq3sOT zc0vKG-B-X7taoq)99#j1LhA_DI-o$i#aEy?Sl_IGmk*&8)CBjfZB~R0hRC*sjSDfH zx)lZolMwCZZQKRXZXQ4vIl`AmRp-NviSQ?*q63*XQ|R-(l^yVAm0X;Jm8YiQlkI1^ zEU+N}Y1JIu&Vh8|JUfmJeV-Wsr&)V|z!ks;bl|E9{B;HJz_jf9hR2#V&&0YvOIiJ` zv8&VQoUuM1{&ouOZC%c`0f6k{Aa^w@TsRM!0LbtSAOBCV<;oN>IKb^mS-st8tTGy= zZOMnf=Y_hHM#ITQkH5eKRQCphCocRoaU>b~mO#2fClOfB=zRM}_-m5z;QkHlCE*)3 zUX6!z@Ir87i_ZGB?)cB%mVh?&LU2{bjjJvqsHEG3_5&f>y%k;OD=eT%P-6y;R5aNx zE^QNPFGOm8iA1(Ccl#(jJmP`lAsD+~R;O;#)Y{1XGO#u_t*N6dwxC*>&(KH;2kMHO z%1y)=h&{Pq)+Th>6b9Q=Fg}_Qm>ZJH@m0Y$dFKLqCz56!0x4gg7N1-IuWhDu|dTl;7PKB*ww$8-Yt&UH`{lu_sl;YR*>l5o{qJ}{I z{6oTh_<=`wTyp8!`BwtFuNTlzyZ?lR_=Q55 zGLQcUx2MNNhHq8##_2yG3@69wOA^Z$q-k@r+usVVUQ}9V%xyFYv9il80_;?zQ^;$8 z5c-ksWusz}mTh+$V{ZIl1_It@W`1$*ngTQr9~{sq7-B- zim(HlZ)AZ9Y!HFh#=PsW5Po)f$@9hxj#gU=HK~UHte%(Er zD-B!Pe7jSScBItdl)Z6YDu-<=JUx_P%U^5D#3gtlEJLtPPXga89Wl{B5uBQnh{x0l z;f+{0d5xL(JT2}uW?6T96?|z7fAbQI$x6Wa=06wHego*{z^9uxVEjxMWHQ!D zl^^yhVw{S49g7V_KT$I#%*Y03)?4X?g7T|s*u1Y^J+pczvPwvBzXW39%sw^lzsg@< zk`22SXho$r`#mRy+c{!-+)wCXD%jyfn24ZQ0C?-SLsw1IgxI}0_uy}>-7}=I`*if+ zZ|6EzNn;yy?9JbLZA+!GpXkV&zkz6v@DLO5rOWp7@Tc)dmzj810`M8jYIvQ2%Rb_@ z1^~2L*|1oeKrO6**j46wEF`3Pv-8vgYw+?Df&{N&bUGgBd`t_cB8zW zz7|+OOkGRN!1*nHfF}f=Lg;eUW%Q4}dc$O}*c5hKke!OLAOdiGZ^h})01Z3zJEj$9 zk4g{~Z~%b_aL$iy=Wv8i5&NAaEjMjFFv2haoJ5QZ2*Bxi)14dv48#{0CLT+ z?~*I^?PkAKqF$NY@%%s1nhO)aj2~wlOfa^Zj7AR;UGP1x;JDW4wdOxdSPPwDzXj@u zXjO?d?6E)t5gl6vVdDZ!M6`v`9}Euwd&JprKP5H!eZE+L89r&5x5q`11}X|4ANDUv zO;{vW6HJd4m}jf)1Zg0mjQe!9H`K;@P+K`i)&~tLsN?5AgDm8w4H$ zA1!!EFdp7h%WIHT@_OocGo)=|=XY9@@mLF=tp-+B5|hE$Hrhl?Gv7@GWQ?r!;}|eF zxh`nI0B88*lz4hD`eT>^SMSUQ3ssHOSGR1+11zsSsA$TonfuS)yjMZpD@+=|1N*vfbeDNyC7BEH zt$32+$q8FKVB>4}u(zs%lt^Ef!eVUsP;oFk1V06Ds4scZdP>*ZJ#aC&tF7W>M{9@b zn_yf9R)Y2mgp(R=b?8rCg|)k^rS>YF+>2EF5P(+O!7l985uoD*Z`m(}ISVKi1s(^R z9r}c4%K`H^_)G@XNQl(Feay#xPw9{mA#|$X!nbfx@9Y$$by@=62#fQp8U0vP^XOjD z_wVq!2J)xu_meflEilW{H;QgsPsPH!naN^oUSQ8Iyt3Ovgy7@!H!3_-$Z#QdT@M~H zO_Ua&mJ8N|BwuTJmu}>({QdYSY~Jk?asaIu$gi0==K3do^8Wx04~NeS98T=nW$x{^ zLlYB#7|k zwjxCw>J@*7YU#!)bsCI)tQ*$cuOv#R*lyGscs#I-##VtQW<;+NAJSW^*5OO^C3<7& zCc{UViNTC}m+MQG!v?D016{!$W^aawjFMy#qnuCPC-bNK)Abk}ffnLHT zuvQKdB#`p}5;@)n=>>8rg7g}hiy*xQxltS>_@cL>imwe-gakH)OCXPMkWh(P0ErFt zL3)i{bPLRPxp|^1Xe^5Zl$4#hyohsITiYR6dm@uuEjCe=Fg_puofOLI)-+Kj=D`S= zp&U18f*?2(jyExNW~g8)4>f7VF{6{oRbq3MdOkd6$c##+Ulg0KsBnjgDvP7a8EG>4 zg4i61a(`v~lIiEg=H3N(*-~j7)y%XlqMsAbmnKl3vETNv4YE1)?wiuOj;3ytaRq%- z>a@+XP(gK9Ew%@Tm1{cvHxm_Np+?)wCtGYw?3$_Q93KKLaj9WXJ>e{$?$k`B|A2(C z7HXW+wxl({VteY`1g_lT-z=~bFMkl(s;yFsZS_*A-S(2n_9A)4i_1(Wf)Xgb)3yP} zPX?TvU^gk)cLfo1?Z%RVv;2bNu?=GO9YN78_?>W8knp9oLCC%>aJNX{H43Z5LzJ1Q zy*fgUj}Chl#{sQ{8r^6 zY;hH2=~6KrQLk-|v^)kDfv@FN2I}j=rc<(AhNrOI4Hg1y2#wmUkzk9koCZC{Y9Wr@ z>bwA&;jtKpyx&RG$ACrm?HJg$*GHxEH^V#k6X0kc6y89jksfnVR!|spM1;IL%sg)R+=(~$Jg!bz*Eq5fP5HeVWC=pC>&Mm zhlxV=^|m3RxQKdyh=_vgh!)cx%=8%6A`N>AmRT54AOa>)S&{K`nt}KXI=l0c|C1)q z{5P6RJsQ)mBY-gSPQzosFD<)qJ7Lu}MK&I`0wtOcd+n6fKpc$$3bw=m3mpZrBm-Hf zV@|-FXfhEN7(iLQyt&j-3cLcnX|hmlCd!gmf7MCsmr`}LnA=Y{tLpC@kew2nya zpOH@DV;51Y?F0^pq(#@{aK;D+M}-O7&|xeceaA#?0MIxZ zg@HI#Gd?Ab_^Rj*M7yO<<5Eut4-YK9xu7Rk);em(4D925f(E=ZFwHCPFHG;lK?I!g z5GU=#>GOid_l;X65NX0;r%n&X_w4?{L^$lk7f#|Uxg!lI{FR;h;zac>z=56>NWL3r zqTZz64YUv{6ZKZ($AsJ3Zzllr&DM_z(h74s!9;Co|CpeU^}C2f6IIgnF~J_~w-9gI ziMO1@W*4!gj^`mtre9{Sc?=!!c`A$QxHO3`{wJoiZGdm+C+x$(9Vy5}9d}YEEYxW` z^<|d>Z0IL_M7vl56tWOsnh3kK*=6tG+2J~Kv+6Ae`Z@D0D#(1 z7f1pSYr=CE2^62|K~^4Q7d8kj#2Ncpf%7ci*%oVeSPy@C>hy(XutC|Wui>U9a4KEf z3k-}$IEk-a9X!|9v0XnK(cy!GsSCsd8ice+k_Hid>d-K=3(zo|PANzNhzre~sF6Q6 zgh0Fz+TD(j-^pe+2t6l)48$481E9_hp5-hb&;SH1r(*#V*VhW|n-E{5dONh%{;ly` zKyTo{P2=Gv!0A*Kh^4+!V64I;u<-~H@{T`3VULbD;~`q*xtU&T8qCKtdHV-Uh)2G3 z5e_GD!cLqt5T`A~aU|+bpK89`c^T&Em)z(aH&$8mQZM17<4y!+5go94`4%i#!|12G zxlXCvIo&MP&9U>$p?)a1)JtF}BC1Tp3p+{v=2)Jc zIAC{N4cndOB#wEAhARQrXYh|lga5pXdJzt%!5VDaV4&)CRE7MGhx(-G1~9g>P3@%i z+fOR&)aQniFBzy$w@PZ#_~LBLNi^7rV=m&r+3Eb`3=9sA;OwibOPH4q^1a^k0d}3_ ziyy_%fp`WF5AYD5xQPA5q6LU659+8Rx(_h>9t^D(Tc(E?h$D>-W30p2itpSfa#9EF z)Dd~-+3NG+1fGSmU7E;0p2f2e1`}a>9#$a+-67uETXe@o7%T*{7|hWxL%+XUsw&j#&;$wa*Z1>q-@flC?agJA5$EAW%8A9N8LoW#FO#H)7VQwwp> zvIMJ@$~v9TVD!d*6Sdw#d~PE2hh&Xl%6S8Z23DN)PU7V|Qaiy+zW{RuY`x?G7#$l> z7RZe(`N!nVc4EJQ_ykl87xAfw_*@Qlici1uL15vV54d}$;U>!kidG`$40x6zDb}=+ZbXr%Q&l zSyl}`4G-7}qmy{aWn9tK>AV0_d;8hx;T~e!#S9&>)o}w(;bm2MC&6^~55#_}F~AF7 ziU$_KY+nVMq?7ukT3ouq?F-+BXoNdx#Ho#HTW^ydmo2I_Mi z^{I#YATQ%L3vs|ntnx9zP*0kHL&{33u22cG;VDfjW@YT!wBiH|4O z08hHpd_1`pc#?!;NhI3V0Z-D40sVj4iPcVGjf+@|cyitJL2{#Kh#T*A4RPRmz=6Z; zN5Afo<;ru#j=S@q$L|1voW#2@Anw;Z^yt7<5VRoBPAqy_3hmNK&~EBdDKyI_LbHqK zOQBVFLcfPdn;{*50tx+h_*VpA<`&iQTEx>Y)9Yd4?&HL&`xVcyUPUK@4)r0oflYv~ zkC0wPqAwQdRkV6YuX3w~^eS34q*u8YaC+5;|5dO0$VIR|y~_Ph&po}$L~MfP^Ym}H zl>8NFz&D+$ZIQM~JEdgtCD6HWJ9W@S9Mr-1**1F4s7w4C`;7zruz`S+TD@xz=A!-u zDK5$oi~hda8>sbKd;Z2hzW;8`#9$9`0C|Y&7&!IuPzRFF-36gqGl{>s9-h-NP!*eN z?;^WCy{^kdO#;1cP1k5ju_hio&NT5069~n#;Mm+EX4`sN#C7n3X@h{x@dpnwxXSvo zCT5|DP{`v=ry@+0qA;=DWLs^qy=1p-Ag|ozJiTx)00T(GJxKJ7-WGAIaE8?A2m-g@ z41WZk{IbVr4+3MVfL$;Iy-u;c0Z=S5(bn{|E_xPwLDes@sC86tVlr74pJjv5u87&nMorR(>} zO2n12lcIk~tHfJn$eZqLwHxfUg|7z|KF|>r8f5zNZry-j;QK!! zeETFmPq~@Tsj_UUIS<+e;3at^0-g4@LGX-T@IH}(>xv&Og9D`oF*qlY*CbF#uC!1y z>qO4-0%tD2H1|ZD166_jzTKfAfAcy8*jh>-wiIGqX4y6tfW~_~DC;Xtt%&?pGPF*> zQqZQi*v1W`LaO`Ia^=c8F)S7K@~`cAd}yY$@I>561Zf|b!~wyz9ef|KwI__N;&Wk~ zbgiW4_t*JZsI5-hR96STHQ?L?*PhX?W&A^9$#arAzgE$?F|ZOsmTONKm+bytQkq*@ z=<)UAZCwYy&QDhvu3N@0U4K1}Z4s^|^!5(E-C=C-g9svk2;gue0_We~ey#XWSgYT; zvF;8@>G~m(yo*4Ib(Ir!d3=QXg%%+w8d!vi6W>9g-Z~hWgPaXS_KkJ75MB_LuHu7- z-3$SJK~hH)P!&zV8nMby&VM3XC&COGam~1?1%v_jYZaH8&-pHGd@b>aF!}oLtFhLw z(vDtnAJ}S23$IW4p@67hjPuMh=CkJ{cALU!o6K>ExP`APm+CZuS?GcMDvo*j~9_7+R4`~5kNNU3Qo9ALCqQYs*Y~e*%xgfFRDG1LKS!^pA|``e@z>hU^3 z3*S|40dTHt{<#qHZU9PtvQ7X^d&)oeWbd1;l7av0sTqOQE?hrn93}~+a@H? z^%l)>J19eNplQ_7&+uWWEkrq5gY{f6%e=)`&~VJ3XFfR(f-}Ju)k3|+wGeb4GJ?hZ%he02E2@fNlUOr;2`)UeCTj=(oEXkHcTcEQpN$qB zB>9CiWbGY<9G%}kEjvdcpeTgMSP;E!e&jf~%G~CM63GevCB>zM;7}q@+A{f5)E|bw z3J@%wx}ZhC#njYM3rTCwQmKJhSU{!hlf2F=uqiU@8tw1#4~a^18yjcZ9Rv0}%(4uN zDU4bF@q6w9A_c*=gL~w znI(C}v=cK?Nf5Kfq}c#YUX~qLoAgw(iAX}qE|rK~U@WJvS}2vaAri{5`I03QJr}eC zSM#rbk_Dx}dP4tQ(!FIaE_-oz@mq797qlNweZIXtzv!lcdQ?0fD;y8{>;mJ53KO-U zO<*F_79xRt-VYW@C87cz0+@owhMW%%F&!t-F=0Rz|GdafB)W(s*D?$UmREJlSI_mk#H!otV6bAJsx9EyiTya~Zt_mzju>4=@v z{bhnq`4e|a2@jzKwZjFA5{0hdC-)eB1CRZ6kn9~a8J+A~mlCL1_!19KY~cxA#=B0Q z2p6pd9un(pnFgo06!y{c4AflkvMit$tfl5;QS-nBy|ufe)AQ7X0n< z23bMdREw=`3Vc*-+a|kOz5!tnDLf@ zC-(3pI^(-qZ%|`ZkU=dT7+B#^OWbE}1G?RVWanTD^o_e{P{K-Jacs5cUA%QJA@6Nq zmwEKL?_mg@vaEscA?9}fP38J$IT>-yt;NG$g%eJY<{I|LK|Akz;0UW- z2>wd~;%lbpQo@X#r%0wdmWQI8~piX(^+iuZYOVFa+lBXDUz0DE#f?>ik&XyART z)rIOm4Yk@JfSDQ@F^*L1tQ>u?dX$TpUa0%6izjmOgk9aj zcBpF)ZU0^k{y(6>tBzLW&BGf9t$F{Jagl6rNf`K(@^l2q8f2zl!2TStR0`vh7cOkH zgo3Z{1HEPIEVdwbr@t5$Mq3cw>2D!s8;Cg&cN7%Uox85tdGMOUwZ#b$?)YZ+#iP8W4t@i8KiIBWOWIqFr%8Gll* zpKRQ<2`FH_84Lt;*;Z?{IfMOHOQ?s}0jL>&2nP1FI)a_XGnhB><^$~|K^{b`b}O)~ zT4?8eqvJWjr|JBo;D>!O5d25CPP7~E=y=~*A>N}_2!5n!o1+$8ynsv5iE=0%(Ob41 zPOc)#*mi$F0)67{GyeE^J2Br$Jc6vo^UqG_;bF-id2xyO!yz5rv3BNlHE~0o%)`*f zbv0~FUXr394(_$m-O>2?vXh`s^XPVPUuofyUkAWOO(KQ}7cf6HSJaC{gmFPl=veED z;OFTQa?wL&Q_Fsio}Ben5JV*1jA2CO0;qsAJ1Ea|t8v-9CGH=1zTZ@*`Z2NfvkV z;2|C^ayR0uq-%N~ZQyV4BOk(ktvB(py5hp{sX9InqG3F^bqzh>ty~lKxGXGMX0|-q zSfw*ot*b4r3Po~RYDxeealGXyjEaU$7Zt{n%=>Eg8EIHJ zTyHxr!|;hNwG`sjUj5+FfkC@RE!)em(HI^w8+5`3|K zPQ~%-UUI;jSAAzMgU49ZQg#Z4$3rk>qD);9HA1*NXOjSU>N zQ)8;+))IKC68iD8*UrOax5NHDR_C}IwqYzT+1*#!tzM$88J|=;xizWm)v~7$lXl@) z!k0m_wqF*@u<|5zN20n?(h^WJKGfP5e7W<6yd>tt2mHKN(D;JR?)nYKM{1d1^SsY; z7VLYsSqtykhwR3?E?a~%*ng48nU1xeQ#;dn++RNTS76~qsEi;|QFKq3orlIi`25C3 z;eapfh0Uans9a;v_ybt>%|TK-$V^nbAC=;=mD`2*MB~wlBXgg%G2_+XK!r~lNQzAr zxvK&IAcSx+@Dv91gvQSiWQ$R2G_HUTfLF3cx9kmgw>qpI1g(-R9`x9zyMlvV8CX}Z z+LgiM{_?rMfZcQ_Z-!H?Xtcqjnt<2Ihg!)dvJqrsTSeLKrZLPd6*TSE2f%3;JeCkn z9`jxuRNa{5co)X|LtbqMx%T_wKUF1Db*;Da&h@}LBV)tS6HDX8jJGu{*^Jg`m>%wv zUiBr_d~ok)|0bO^C#39)=y1r$0QirV<~)2SP#lp^=# z9_RLrus`xES~3S~j)Yx2GPWgMojnt-aiyyBK&6Ds|5W8H&5}pK`ERFCo|JhZDF1TM zKz}g!+h2$;{Jn<^axYSQzTtV!Xa{Z(G2Q_WF<9@$-QXoS%KgYplW~x}>%O?Wfkzh( z8hHNtE$Xt$qEh`wlhNsK%Il?ZF&ejVaNnucgkQ^`zfYnS{ZDD0G=Tm4paX64b_yaS) z%(Cs9Wl;WPenSOe_Cd#hyVHh(pDa@%9UD4yDhOON14wbu}yu{FlDm! zbdaiXG6bFJas)x>VWw!73b?sS0afiQBTf6?g)Gzljqum7ax$lzY;!+_jAeO)WY!>H zQ4I@(N%^z!$Yh{9*ue+tgOfi50Rrw-zaR^Y0j`+C-U1_wFNV#DC);|$F2s?EmyM68 zxeJaGrm1M9sGRS=_Hzzq^S
lLR2~1-E8{P^@{;qv(B&Vyl#`|{P1DdF z;4~fo)yr8%4BLD2MlLW9oi@HgTQjBrGno|D@|5Sa723J$2z`mrTC-oNv0mseY}{L}Gp*O)lym*dn#i-R zqeJUk{Ojvpu5Z!RPBYXTO|G40xc4CSm}xGj7|QPA4j13^8GEP{$|+a!I+i$ z2lyQRUdPA%iWYYI_sRE%^fYKQSDVlG@0H^(sBW+ox0WmUn))wvc?&!1uGZJx$mXOa z$4g_p)aB^%+G!dNf|md4_%DbKfs=pYPR6(9$}e=amXLZ&FfI$=fehSNZjePJSr>?wiZi%NRdB;9u%0V5Vz)#gn>TLDz{OskTp*AmU#g-WjC`Y;ewVvQdZBP=LxdH}2a8Wdq>ZG1*qK-EIr$J`sMI~=X~ z$8rC6(sk>jG}52_%4aD5{<_XwzV02Xt=cREkaA%tPCTDD`JUfpgRRJsc4^ zI~8p0EbP$$G+=VC?a@&ZGk%;gwvzo+^GOco+>rx)&@uDnw?I{d`LGS{;#)*o6PD>J zjH8F9Ma>OT7W)k`D=U<>=m;!uOV6*=vfEutl3iny*XrM~+d<=@URZ6C7N3`Ib-4~E zKfU^ykq7kSEYnOj$D;gmhcViQCCQ7E*}uN7HqRM~a|6k5ocZ|?y7hLA6k&XC=-9%j zoWt=sF+YJ31P{5T>Y02~YQ||}RZyw8X7~HU_wJbakL!kSCQL2U7Gg#QZr(ino&G_V z(rJ3z#PR8YxU;x{don;a4cv1hpQ2a;IpF?xSEFAN?ByCc|syCI#dFe$z1siDER z%74Oy8w4A+4Eyr3+i&?TjI zDH(q6&3qUuLrKM0PTK>qETGC*5O56cM(RC|MVGni+p)9(q z@6tvky6ec&&sSvD;aayY6>s^*BVJ1Eg`4UosU16FdGzCcw6BG0IRX=WA)9c9+=wU8t|ejejcdHi<9cYYcidn zwCXKqGS*t;JBIkxuJLp?4EDTslqj|zq2O$r>SP`h9Jrkzb(^g3o5WBs6XHOYmR$MC z-HccKpy0fEYO{RDglPl^gCmZI+%~PZ(nXr5rZy{G6bpw0M-q>{Wis`l*!feN(~^~& zl!@ZuNFs;)lgZ>qGgFQKk>cT$;7B5e{DW!gj;ds~_bRxfFD<36(kkuIpr)t`LzNE@ zB-JBmn=eRyAC_U2@6gMpH*v`bJe%@sW&D1kC?ox*ndJe^3((EVQ)cHy=cgE!r{rbi z55>ZcsicJwln%=ir-I-D_6iSZ%;U$CVwzBBf&6VtwL#Ne(&e6Vz_!DY-Z41>uVV>= zStd##l*2r!j$NBOg21{kv?&TqEx2y1GP|piuO7&{>dv~_l6Cd_ zEV-PMKH@DQ{>fe+=I9rvOgN9l+cwTz3(J}2nw_}`Z{%V%EQSWG^nyp8cpf{PSs*!5 zKqrvoAuT9?i2g}>?9_^L9ls@kCCf)(lBrx5wtIe>zDq16 zMcy#%p+_#c@Udn@rTv`HpH_~{bIv6dCZ+N=5EBS|#8P2oAK#KbyComIB}ALfc&w{D zmUL5-2c-+)eZTed`_?e>TXeJa%ncO=Yx)gKx)3r6TC4hG%ZN#J7(YO;`5$49{-pi; zhBDI(3nGqK^6X?Y&69K*$7oDi;ea%Q=FZJ)w2Tlfk^f_;tv4Kv|NK@`&2ik@s;xOx z^G*bkRtqfMOK(!@4ePEYr4G!F&XG?T#&4<_GzypD)%@$eAGg5&=|5?f;hgL0qAC+K z-5>kiKW+~3f7)vz3w0!?jfB2E^QAux+V()?gs8~;sK`Z8%Re6% zIRX!?FCDhko^DUFBTZz?tjP3RK6nwfifLi51~9m?sMgZxBFDYHH#$Mtp*O~t~7z@T)$rPHjOcnT|TwxV%Dn0k5u!*>iFNLk}J^Q7% z%$Q&}kVg5+&lO1qTHt2n#uY+xqbiHi^rigf1=TP0J>Me}65^Cy{;**?c+mxe5PTTt zs->_AE6hWVPntl#SD<73(Q#A`$l;Z*m4j!MH_(sDA3CMBuU{zzu&2I6{|=UoowU0J z-@&$BEMG4D^CH%g$sKFWEV*Nxd5#Vi!Iqg}->b#1+H_%gnzqh-S6}MNg0`r1=&bOe z^+{{aCwHu=0b*!Bn35f0mals*8(Q>r=4xH3YmqYls%e0xQ0FMpi6MilbdE7PF*^FF z&Jk~Pr0c{9{hDybV|2{ZiL-iL&~+ZV<9M)-BS~<;ba)a#tZG`{OJn^*uN*vvc~i(g zbk|xWIHGmynZ^;KGY!-@vWSzBzeBz3($DLaW?v!Yk#4L!bjP|=Ah7%$^NcW9Y^}3g&^aFTcO)4*QR@|Rp3V{D z?}#&!{eu8AggK~D`FV|O+STN zlVF_G=z$=ZmHG>DY_oq^aurN~3Il`(WW~hOU!wG~ntpo|ZJen?wl@{(o+z)enma3D zMQ`V|+GBUEQ?Jz@>$YCT@`Zb{7p@6m>uY#o>Fp-dM_f({9Eejk`WHWoZTk17QamDI zk>B|SFK+zv8{kTgi6)7%wH4f1FqVn0iHZVm@@5WtVJs_Y1sm62O!5=w8N_rgb`NuD zuB|W}JcC)f$WQrI2c}U(2i9y0d$WlSV1x*Ttnd=A&mQOi!ynFLr8f{u#zD^tXp0Hl@J0jP>p?6jTMxicq}ht!{Py8 zw}Max>&;Usf1^~yVE*7OL+*nzn4rcw)mQ=ZfEvlm*o!ijO1)5q62Mc*nm@`>St$}(E)~%^B9?#)j@e6pX2wt3P>lZ+*8lzJ_ z)Z|480glv%(Pi`aES2Z@TVLyyUyC{2$5LwD-`kqgyA&Hgn>rU2D5)5WDa$EvaD@7Z|qy4$+iY&m3&H`jy8)CH7oi}c|sD>pgcM)oB=P^sAYuWLg8 zjD5xnQI22taYW-CV%#ba^;h#jS3&t)hL)tQ9>rLIibaFs?xdJ?Ol+Ktk7RC)L_vJRdO4R8T0kpHA6 zl4`*%rTLo<<2+=Yc9|{)L^@U+HZcd!d8dZuC(zLoddc7Nf$(`1dRPvz1}JXG|AZ>4)%sLW>4`3@IjHu;E$gm8At2+{9R!PG zbdD+E;y|Mqs&hOUg#1RSi5g}R;-ooE=#GtD^-)&GG|@4Uj1?I1w(bzK^8;bOa<;v~ zz`kk+0|-Pu3tVF@?Ueo}Ws_%|S$f3-Dm2aut9+ecHB@Wme`q>BxodsU-1*6E>t_M9 zZY>S~FtIm$F!V2r5|ea}bPkKm`yQ~!za?0XUGt^-RSy<~5mZE$ltwSy%2Y}la7D@i z97+zsQSF6;eFr$|j8Nz@3B9BSwUd1LPx>X8A<>TC-H!4vhE|*B7N#p7LaQm8U9sw6?`~*$*o>hHjzUabDTnpa+)tQiGMN8&I+EC>*)HiQ_&Xz=^a&SSStdWrZ?h9HFTKsaLQdFK!Xeu-={P zb(+3QbyCoI;UeqRi3sl;AeiQ)G1PJ%X zgEFAazBLH86S$LTjWXiwG&6hu3@(7?>W7%2b-W?e;@TECXo6YF)zh}SOUcpKL0zfT z^&hzogR0UrDdQVt(HM4Z&6|1T1qQqcMt_v1q_RiOg5oSE-f@at@V#RK6OId0q_`%4 zN)Y4x+A+)|?0|?Mwn?x<=%thXNZ`&oFYL#~%t1|uZ+X{odD%7R4s=*A&QvH%uCY(s z+-KwvSlj?rSd5ORy@gIX+*WNCHnD-A0h~Tcp;||{j`al@;3s0)uIgp=&gykg;(PuMm02%4kx^PyMu=|Gc zCveKx7ekQdzcFZ|R@y~=@Fh9U7jUX#P~L?sT$#vQk=toqp|pFd%dM*epPd%ne;Bt6pLTnggy>jvo$-&E)l_~uV}taFH& zowy>kHOhHkvbS2C>5}y#d&gH_9p-dmZ9LZ9E}H&k@Mez$Y;sk#&%P8jR90lpA?B)}GHsoXnM@1Jj!;WpkR@-!e!({H zuohN94Ijp7E%SaC-WS?@T8!hTn`?_Tk`e6zyCulu8UW`)GN$>yu&G)*isLZNFQR0V5G;hO$2`^X$>0R9A!C#hQC=~MBG^*$;}a%MNSPV$g;nrRl!4{( zIWvdk%zRKyW67~z2*~IVBzuE(lK`@<2YZ9bat>5b42KtLvH1lp>%jS$4hS3ftT0^L#=jEzHI8H0cP$xj#LJ@)6-cn zq(X1=bj}(gpFuBJa|(vMb2PBBAinez?t8r^J=lO2T*JHs^dT22sm3RazSKz09 zVLzIB_M*}caQ~o&JA$FqDixUIUpFYUWu~;gy)!Xw@+Uf#cVnNz!5*YxOQmJ`V_X81 zpN30*hP*53XHC0&3a5bD*cE7B#L5gch{txdAXM9;}A+`}T+~2=vHtpzra|C-C zokBlydqdB<2|Pvsg|aptih9acKv#h>@;ce)pelGS3MoFLc}Z}9M!@?Yc)e+Uyq8|N z?UA8B!5M6@Y{dWpd8X+`*@a+nmMIUWfV^aw@{Y>M%NL_9Vn6@FN|KL1j(UYHH1UlH!p7<33Ij6ez@33CrC__!A6h5u_i75>FY-=rK6KXnpM|4O zF4J7>^o}1ZH#j$8_3W<-+_+-m#5Q$oF^;duRoKM|uSf^@hwb4XxmxwTOCJlX;UBry zN0STf)?5BL$GWVA{-qEAC;2-@kHqM;c`W-g2KVBaf3O3er{u~GrMba!JY!8@6BMYs zGw#Apm@uDxM6s)-y(rL;HUZey;;~%Jc##;tr!Zylu_;v@g}TmTcdVaTav+{qOlIt+ zNc(w4Sw=?q$H=>QTKR4G0YDQ7Rt(SlK7~J}?C4OwXmErtB?*m$konSwY60&e13QJN zsxpZE_D~+_UlcV(-maECMR|vp%_`uRP9kY&mCiAk;GZJNJWATBCLJTattKt@W@Wp{ zznZMmniav%aooR3U#JtpcNf)wW;JiBj1Co=#hqi1*hUL?8*Rgd z@6X#FQgxAR%@;cnTe#t$edSf{5R}OZr2G+BmTb_r6D*q#<^oZUW5yn>cOR@eRe$2H zb(x>=(Rtf)!Ilx%?L2t%l=~%+DWM%KY*|gFLS5R^IiSow#(A@yJg{PZieJ;5u#5j~C80bwDE|k(Bvb{Dpivg z=*XeW*I{CP?kTI6Znc%^ntN&tJ%wZz6*j}V4QSD zE&k6Pju2kC^is$AN{{r<2F&#X-yOwenC^_R-%$b5st!_~<#n<&UB-=0_E5X>Q2Y90 zya;7^hv^75AQ)S`J0Z@br%^cb4%$ayc(f7ErQnTsow`aOY;i%9WOKb&V;rOJ`cj z64_PGzb%C9zCtS{>e0~^S|w5MUMg+xz%JJ8Tkiz@Xx(UlM=+!OZeKXCFWk9ATw)Mc zY6bJbd3iaGetpYyQYR3!Oa}qB5PT^bwx42i55r9U5{R5{qg!;F<8ra2IB$t7dwH2k z!4^3fn-++qO+R_}W--A+Sd3j9<|ocFh!dcUDTFvP#Y}^^LQ8iRg??Z}L#Gcch@|IU z09R^owZ3ei)J4040}T}hInU$7QUhjcQAe3Uy67nmhtDQIK?pZRo{J9z{csOf=1`DL znT64oUhhEuPpN?@vy94&V?Q)t{uLmRluxlAG$8pH=?^@YJe@wPrG@mdNK;XU{7DNI zpO3v|C=VFe37`{uRh`c?EL65M&VYT-7`+qPwQQ^&8zYPj{iB9Qpkg9GL)EvBlcr(g zBRvGBQ5FsIXoLJPE}5VVHrgQd=l4yLd-g*B_1iWiyx7LVzzj-Xw-wsGgh3~7!R$po z9}Mbup+=r!Dx8&5a%1z>8o|7!%2cKQa3}V_RyuN)Z=M|j6<{_CqA__El$?bjkicNG zUZvE3uWEW5ohed+fi2ZTKeR{>vXsP1^~>n9OTU&r`>^rHS=32=xfTU}s)ttW4R?e4 z1NRp9ilbWhSUk`9X6_KKPNQ=s$| zOC1FNs1pYS83bs_PYJu^D@I5!fgU_qn&fl%LKJ^wBfL;-2Bisd z9C9<8f%Vg?tRne^uvGh~6z6bpIL!wGC{f-_M?c7#rl%FC_l{_O;td?2kIqG}*LZQQ zufd&!h)N7UFLzH{kE~be)y}sb6prrf-Hx9=URZ!jp8=PYll1dBVO2eqpyKZ>OnIrc z6-#JME30mmKT-K+&ipG!5H7!mA7u~yyx+>lQ##lx^ut?%zBvn_&d@gLgA0(to7vDd z<$c(%Wba&)8+#8<-XC};s0~x=d2hQ==ktJ) z{n^*f2Y$By8gOs_1;C$dldhs(PD9_o(Vg@}n=G|K&;khVk}OVF2LW*WD*0`~7#fS) zv>&;R{YbER*NE8o@&Gri>~U3%+9z>Jg!=Sj0YKweDB zE&}W(L})Fz-dgI~B^AL?zmlp^Zt9&V55#VqhDbY3i4oOqr~q znJT4eB4RddwNgdW!x|wI^z3KQJce+)$2l7n4wm9HBu!P}MIG#%9xt%)f}7E) zay;)t`9ZZ06Ylk)vq=8YSen^im~c3N`g|}hqGiTPOe3{Y@3jKKWC;Fs>R^2+zxcgU z;-RY07Qv8cAB-0`O$keKQKvH_uS4vooffd`ZRw1Dfmv$O?J`#wLF7OOjml|ki=JLt z;96PW+<=OnV!zT5D`_a6hz$VH__U|Rq#hhU$zJ$g(BErMsXY|z#4KLA(7$+}@+ClH z4IaS7QK$?SHmKK`)gzd_a=jFLXqjCWfop><#>n-Q;Z#osiLKxnlwgm1Qt!k#LXQV3 zOa@Ms_?Tm$nFZ%iRKr<>N7>;4wuVdPd>9FSF_ANMwa}P5(B_`U#?|QTman7h zG;4KIwXqx*P%8YY^>g&{sKz1Y--VB22aVO{vSj%$x@Be|V(ehs#J%qusKJ7D>RoBS z5q!ix<6YxrQ;PutbXUy}vEU>=kS6r-5Uk30PrkPoZm)a~=x>P^*x$N^VWTs_JO@i3 z7|T;eukiI_p*_~jk<+NG2mh2X*oIVIP@9c~$=Vgu5bhUnz$#%bo#5G}iI9D=V#?lh*%(%5Hd zOJ@OPz_aB9_r|FcF4I*83zc&cbDhDxI)a5dGg3y(+o+X? zfQ*2jLmJ$2E{)JsMrdmzbjs{zFYe0yo676CoAZYL8t$G_imPN!_B>uml8p$W_MneQ z3@OiPhRAX*Hnq|`xTN{ydYoJfnz^Kx8S4z?;PZiQILu!x1Qg+9#oJd1qjeL|sAW>H zS~l}g5V^r_3^1b1G!(2%M!{@SGn-De+-VkvL0SdrOb!?fdAJd?2fde@V}Z-hR;KEXd?DZ0q^Jt}$x9iqab{Qcgz zF>-r#pX{f!D{9+vYHKzcRvKd=F9ob8&`-4`qvR*M)%Gwa#x`$Q;RLR%?49UHCvP&A z5994yh~}rBnscri=O(g$)4{mB>-o`vlM8jf>mM`Tbz}PO8&X1Z+Vz=PfsB+?po@a} zg6lqYV4iRrQyVc)4kY?R<$xE*efU?}*^i#e=-AR|RvR6jA03sa#ZAdP|C$S;SZjRq^%~-MqH!lELBr_w*?MCO0oamtZ=-|nPhTj!|%TLJ) znwbTvigCRU-tR#8!R6fcTnMU-4$9Nk)Q8qw4XuGNbeEwt>!JLt2XDpHd=h&4!JO#e zrl{=v&}>&|_P-6shV0u`niZL!H8A_-(44Fo6%IMF8AF@aj6X3mR7?pH!+Q%l`O)S< zI0Vs2lQ6J2cD&8yL_obekQ2QVZTe;cCl*ACF+PN?@+89jpTjA)c{-v)bD~3<@aE=G zZPl2gW1Geu=?DGh+N==b49uO;VS67iRA%84ZR4*D^;uy;IG7*QI!7%lRX+!&iN`-y zGks3ME@Op(J=~1L53DGsY2P>Qrv5?*2G*ZuD!YC;Ri73DjrjoU%YqpC#0uAL7=W?v z=mp(o>llA=Vy{~01^m(Mc+#|F7S#^63Z2;G5sc2HZw6`waA7!Tx#n>{=1wY{HAxEz z_mWw{lWBUfe;){=qoK-JgvAItp41})t`@((q(Fc=e0eSaZorVRsTwzN8aD8{2%3UZ zgE!D)-$M9VAPmGiT~6Z?q=k^=oM+V@q`0Jxqxw-AJXPB)2QpUwC`?@6=@uS0+3FO)LmVYD-56XF~*X{Njbu^)EVRy_~bsCOULC;z^%%MpbHW|4u(Pf>&)5L zgRb6)q=~*UNNK(dx)ITg16zL9D;yKT%M*)IF#o^gaj(w7tg`x4jau6h*f+; zO+E4&T3lkIV~64VAQvYH+vxlt_E7A(*i*6B?OB2LtYCXqkUcBJo)x-LzcFNE*2ZTz zzD3w0@**r_V5N^O6gP z7Cn=b)fY2u1PM}ylW|}1aEq7Ufb=w?ETm9h6oL+P?J6jqYdLML7R1puaqbky62paI zZExfjf!VUoGwqwO?+_>qY>eETP#Iaf8Eb1yh{iu|XF-|CFE zmPUuUuhbGLf;cWOI{S@WTt_x(H9W|=sjKXiHL2_9EbFCP`)2JsRf&WF>%aCiMVAe8 z4CpV+6~=T#hj&H?=WW~g%&qGkS$(N~Q!CopUszHfZR>~*xw)rWpwL^-e0#kKp1g{qhXjK&GY4l3VcCxs6dMX>2?qC)v}Z5{ zXAQWTHY+Vl#bKo{*3g7aHIs2+9MqZTb%a1CU(vLpJE6jY4{%ey`&IcNx$CRatU&oT ze)(n?;nK~odSo%u$X3{3F@UV>x;MXZ96ef>7R=r-l+&~!JO|J5n{MyE40NU2Q)nMz z#tDV;*H%Z$1CDUmm&SootROuJzZs{#-sO%q8 z`p{yphQ7yH>YjE9e$USozz%a5EBvq`(n|&xdHX0k<&P+Z3tu9h@&J6;O)dtZFxIze z&26p}{#P^9!d>N&yNeR=h-*SwE(;~)8l+J|nh3Zu2B-k<1+Hd znSD@USkv_VfxF+2gepP^uN!v((Kn<$FAZ(R17AP5m*%^&g;j?!-(!XF>2nis?*_(j z5=IA3MhONfh?lS@vckXsJ)U1c3A{~fx?H6RawKn}I-b(tCMmkdFAZ(_t9i|>$Q_ZJ z^SxuJJV>#qA8XDx*kW;5uvjUL)39y>6TWV|Cl5e)Ihb9xoqo!ig-y!BCQO=4UIEri zcMhPlz^?USgT@hzM}!pekQ~bZMHrY0M1d_@wy-HiJI{{iY6)zrn;%-0XdndAD}Qfl zq0K*uZB%txZ@I8pcCTE?-+bpg3 zxg%*qN3euMCL08udLcvgr~KScQyr}@ZK{UIN^)_;=NHI|!AY>UntJxLhRHL{KzM^N zB_cxF#>dHbmx53MZADDHuvRYNSm2AJPOmNPh3<VyJ)kyqOXo!Myi7y#BGMj>D|_ z$3712bF~eqo1W+~ux`A*hvzxG_@So5eBA2I2R;(55#*w#79-M=rcEJr2}zt*llSm;+Ff8^h)=@fw(R_7? z&Nlp4xDb+3d-vyKpa)_MI6Y`9ZOtrl|I?VV!@g7Dqz8(nXkQ%r)qQdO{W|>mb{GbA zkQQfu>BZ9{Gj)pRG=W9+$9F;P#5K5wZjb*HEcTv(6{r$CqaxpKg??j+M)|8|y!XnJ z+Ao0M%`@uYpbWxkgp{1|wiP~`{Kma__*ctY@A-N9)$&`u^xSVliTCmkmbd&O{a}=M zFMf>t`n~w#S}}N>XrCY^rin9uE2e~DhvHZPLIVzl_Q2$jHX?VzD-%_%L5b={!-(2R zm$B0EB(&!u2eyN%1DnGES4rnfar){$SSki5i}uN4;xsX3mN;{+m}wGM1kr$^^kP!J z{|AQ*LvoelFRzAmw^DZ@aE&jP?yFFUa>?uPrbmc+AD}|da@0nmkH>DRbUh7Sv$0z% zT`T18J!5xNx}IhD?yq$H89F^j;e91HRVx?;h!?&x_^&;-Z_{Mvj zj{SJXxk?x<*b`z4u(X1=PM{+f)ZvKk*ZdlY;9bv}o1nM+Zd0~BAu@8fcL~N4L3;ww zyFZ;Noq?~!zhdx+gLY=uI_H7-*nvu5|64qzGjzfZY1tXPSU1D4{X*|T?Vu$A%CaWs z#j>a)7jZ^{%OBq4SmudmRc^LnS7|+mW0eRzZ5~;bOZ49YS*`_4^bB3f3{%PsI5d^X z+S25J(&S#1$;S9_h>c5U_{Yad^flvvqY+;rK3$@3Z}%yCa*#baEG4;rN-{nCU{4OT zCx@ma_e)8RD0Mwi8Kj{Mu7#CBI!2Dn${>T>07q73kO|V|`0&agfB8#Kd|YLancb+fX>P^Y6?JQ(YUB(~s;Fm262bXS!&A_=xB z@El>}!Q6e?-B51*14k?B)6&}Mk{j@9&?m9zZ#68Ho$!Xy7|`59I@l_4MZOHL>YoE) zv5Tqxg&Jt4ed>v{^B_&S{3#V%s7-JUbOeX2d1d#$z}@>J z_h}>2397?LcyfPG>AOc<<$<(GLA}}eK?$1PYi?%eBxL6WMMei^$M&Y;C=U4Gf0o`k zz~e?tI7^FuCj><%AH9@P&$?btc0~@)wnz5lF|$R!criFeOk`q;U0m@iF+U>W)X3nz z5noRZ4vx6FA-ETwd<^!F*uOpaXZst}3G|`5*^4LzUpYtp(or;pOCC=oj)i@aVGbu2 zt8U1pWxYl;hY*5uZY^5n)~G=Gr?TET3z_bAH0kkv8wyO$aL0Zvv7%J2{N7n<4~~|5 z+TjfssuD`S+^{tuq4b6n=tpV{5&O1m^@mrs`av!NB{zr~(U~aYXKAZ-N*_NpEoH6N zu@HEX*gi{@ZB%Fx*5xVFLcbbsG960Ld{DA`Pas+8*%4W-E7yWW@WFy?D}ptcyD%Im zL5(&c`}x2pvI3uWL?R}0rkB@H^(K@Xo)|6ky%X$#k&&;eZKG;~H3-D;@<6_>q5e+N z46oAi`gd;&bh%!I!64MKLk3>~rFbtWP@wRxg=p6Io;A12w7d5t?A{SrP0GIaSI1=w zXq$A&+qT=9PfKcs1@>gP{EG};Sa4bfwGTZg+W%4Bd6ZYJOMvAKEz;hDN4XCkM5=0( zJ~*JzTMhl?H8;jjNEp8(5b?^rf^gH13gzKQsP;Fl@qoV#`Op;@phru_PY7IdJz;o4 z0xzK5n+cFW#-&1qf^n8c1pp7${4jlX!gLHkG^Pdo&Qh!o+s=XlmjIc8ttGfJz)JZv zq4e%fQr1w?&Ox+w8(SRgM%UbE4sjSr@y;TpA{<=iUClVNs1F>O&qZ%nbAvYI`w4bc zWwNaxXbZwqFu(T%37d9T)~xB=9S7>K&SU6oopBBH@&cgqX43Cga+=BRr*Z;oE%OeGV&k<8dPduftS)VJ@iW?qgVGD{=3cadng6X z2{U{2D=^oY-m?MlTW*{cNdJL4uHshD2+%R)l|Q3Hgp+DmPx&O(+R$39Uvr1`$#~J;dTke`(bi$B)N0jblD6pUHn1dq?6o%R5d4(a+p6(CpR}co z_OvIxbxC^wlk`3>>z>|>{EP-#E&aj&!{$h1BR&=u*oghXnl7yMDJ8DA6Y!om4$>sD*0iJCS*J=gkU6MA#p&kUn3yy<-=q4KR7bw79#{!s#6{EPVT_X7~;O$ z<%iBW!h7*lHsxp|g|2aKE?F(86$}> z?pR@n7%T;Irv5tKNyEF|6$WXIx5zl`y{LK0G|Dia9ixv$);97Dyyl$}bUE5cRA%>Z zDFJ9H4bk|h`q-qm8_|c!aI>N0 zNiQ(y3Bu^(r^K@*q zn#972ybigLN60_|GvkF#k$D#AXs9va-jvGgNTTp+$H>%6=!t&xS&huRVvAhR6ATk6 zJWjFkAQ^o_8N!f~&wB||bDlysBUIi-8c%-Zp8pXprcgv&4|DH8-oeu828_)024Nnq zYan1gaM?K<4jT)h!gng@N$oTb z$SV1PK7hU#W71(TBJq)f@q@mj0CYUuzWW$F$siS{fPLLGUu_sNUwfSSzG4+CM|{+Y zNpKVCuv11V_}Nf3zj6WBkH8?H_~ve8+m_&ZJJ3}csnkNVke%&Dx1XW2OZt({koR;q z6ycd1HuwIXYaCD2oL-M9?oihnGWoOSl2ssvW7b-gf_AE;n z?tR!p;QyXF6^{U-mz7VuA#J6CmELEeCP|ay4%8Zz6FuWop}V>#NXcxJ$D+y9HXS?M z4Mi=wPIm}!B5QV|S8oMi-u1}I5c8lB|7idz($U+#o2MJZxFH~mpiF+>!h%n=Ng2~v z>Ut7dW|U%9Lb5Smc8aAEyxq*WfJnX2)bVn(QshxKF>@aQwpx+KrQ;ig4V@Kg; z+&xDA7;eeX?);KSq4IvWNec>&O(ap-A}?N}g&%b9+KSJ+4dcwa(y0^Wt~0o$a8_7B z+bjrQcW=v)p1Oc0x+34h!~qXW|LQ#8>3=!{G@hv{c%pC75|t6={fr}|?gnQrb+I&{ zaXnz5(T!;2C?DvXUwyOit|~+`SS4(%p__|l24#_o=r;|1SvKSutSlqEo`Rb&_^f8l z^cl=r8b)T3l^2WjBdNU*o^lNP{>;I2Ac94Dj0jc-umsfSGJS4KOKAStB+N}DkQ@02 zMu*{hMLDbL{ZSe0;+bY#@RRlet>rCLokf02Ohv0aF5Ei)<{+X|AC*K(AWg4z3 z_0vM1`SNejaq{0iva9W`)($JiWGg;Wm>_1@TrUX|XRJ$z#? zyyVd`=%4Kiv?=O)w*aZi$Vp;|ZYliY3W33*+2g?{Q&EEgZxgw5+F z;-)wf`*HOAuxePrJ;(?VUKJcqK_X4DJcYUNq-{ax+b6Vl6kkgjtYMg=MV zCaDd&3h8KG@&QPxbg3q20&(NkD^N}rTgznMb!Q(Qi zF)}yM(N8BL9nlfurd1mF1n8mimo_dse(4@<#13YRys5DnR#Sj68ylgwoP>VfNcxk= ztP&zlXK;mJ9A=;bnx%s>eLt_lFppDm+T_1q@$uM5w60CwaHU5%iKk)DT^VOC-hrO8 zK%kC~4azqaI`&cfjmrlwrv`%6SRO;7%T;pU~^TjyzUBd-_k1nkyis2 zkh0rIg3mKN5Ur;`$kwUl3a$Gf{ZoZyIP2vhH$d3b_WVy9)=e9G`U)*LfU|Vl1?9~) zN+cmIg`q5bq-VUaI!}oqI!82F+1A4@3g5!s=5!K!+Z;zQBzbZ&M|Vaajt|Yl4%)@w z1kpZ3Oq?U8JTA_Bn8q5mFGPEinDT3J=C8OvIzY5Xiz#?{Mnwif3h96Nc1n7Re-w#- zWYs4xQsWk@A2m+OP(& zp~v-WaMEnc>tJ}@_Xcy59!07U&lSVZ*5 z;J}F4lY=c0i#G(DB8s*I<9YxYY@%^c^M0LE+w#BU<>g+=slD*O#D$UgFoeX1y~SEf zd_NQ&AA7Ipdu3nAsa5`${JiX|Ikn&XFL7ZJStElVj(B!*@X(074Z-mQ^Pq_1+k@>9 zXZHpVh`4klxL?HG#^4aKHZcBy^TC$*hkio!!$$VBjQ5w?_V9?|lY_%ROVA5*x1bkx zY)3B~*!zpLK6tMDH#{I(*F7+@tYhDL+^a}f`ZHoneHb!L=Q~28c7xhLx3EW3lfwdP zbsJ%L;!C}@qo#u%Cg~`Fx?q0XN52txp2wliU(OxGedlo>$ETeq!G7po5?((`()^dc zt}+T?ztGKdb)c8Rd}GWi*{ln+yhiyr_e8I9J=C{aj+~;niG`_7I^l)|@&9<~othW)U^ch`7#8=<&(cG81^?PZcR-8-YGCQ{m0ainI-A5{ zu#W~CvVie>u+Yu|F(kl&yYPb{`{SV(h_Oan#9$E20IvqOZ^S5krXXeph$~ED&OPKn zcuZnuJ( zx;9t})rDCZjPkG^Mm0IP_MP%`{R3|rX^5I;pfAlmodm!#xvg+1bXo(-`n#*#TdQK- z^P5Hs!pV3pn)TxEL63C@=>**ISmTU%PhlUxlz&ay2V`*il#JUn7rpwM(y!ZM%ssYn0N*(B zb0!XB1Q?Ql`v`8rWyK0o{J8*z8#gzc^T5m)*d=xE?L8MZ(6Cb*=n08IV)PJknAI_X z8%?b?G#rO&ln!TTlur2sypsGbwfTKY zj4~a;p}o`g< zwJ{W>;xA+1Yvl!Aeh?GSCQkr5qQ!*604#+*HXtr#p*-l5-!*{;?NWZ%v<%bTtuLO3 z>vaLO(2Rh6keagG-GyWmcXMh%@srZ?5S=K`(=ZLVtcRH^+@Y#u!@m=Z$mW1#Rp_0M zd;^3&oA2gkU(SUw^|l327gnCZT)-6#P@A57d*p=gKqXc;R+!lOMj_ZeTrf{pk4XqF z_E(LZ>k)1WFS}B6D-6+r>n~VN?#rzy&&2_mD-A%hWu!Q}jDQ@{ z1Edn`1rjFcgg&Ln_W&*#{VjKMcP=d1U;RCRl$X$iT}nXqxc+ zxJJ@Zcem|WuBGKR4fzM2!#Q-MrbT`9grUQPVk$aFc9y<1PH%6 z3j0i@?rzuLb8FpqYyL^ASN2N<2OHV%p@=R0{%5x74l3c_fwYDS0|=^?l9OKsHtO&` zIfi~N9XeR#IS}Rx0|=~U^!>v^@24O1(m~#?+1_>~-q$YnIx;J-{r_win~iqKv#DKl zKbYF31oC!;{=c_No=(?6ru*78q||j#>P79E{on19f6Lp&e*1s63%HKwr;wWq1s9c! zbkI$?)r|@Nh;)rEusud%vafMPZkDAU&wv9%!Udu2c<#ylxpcOlUQj$yo=oUJnWH}% z0_&{xk^y)n{QYWH;7|I#mB6k4o;Y&~ieFbI&;*#sCjfF{+MJR#-vRRv-sPCzYx)rC zee7H01saTHmS-&;Gd@XD%>M3?{65^SmDWWez(P#c{zVIEu?@jBiiNcNfuLA6Ct9uXfJ@MG+#4ad{ld$JWczGU~Y@rxzZsB>e#SA?Vh^Z1K>qu;rHD zX|2%72|js_a)4wk)exM3!0#g}f1nYmq_l z97exz!y-xw)RhgbN z0#qth($e*QqZCOKAd=s#7rIr0!24LsMFA{;gkD<;AqGCHK=XTRU@1(`dTU66P}B<4 z`d4FxQQF;T%`qsEbgy!gZ2wBKp)$qB@$g=OO&4+Mte0UKJ`P3TlLT%9s*ifS+hn2w z<=0w)W)JxK2SBHA*yRbB+0K`Rc>tInl|x!I3k&|U_he4!*9Se$i&(`U@i_ORRXPpr zq4|bqo@C%2>JJjoD1SuXfu9%(3;9La`KaeEY} zvf7HJB3h&Pg2!DIo(q=Tu5i>p42R19t61vURo6@;j^l0AQB;V z6B2>XALl8129q6q(Fw1T2$wufZ8KX156f?j3CCVS$6oMMVUr?1Pn?Gjzd!2cWA_nP zNB4~#__BQnAY*^%KH6bn?|_!~iyVc07tGKod)4T0$kf#600@x7BexciF976n802u? zHJFdn#@3iP0z%r_Nz>TidD-dyMq3H`pUT!Yx5|m-mcXHtY6C_xW*ZcAx)_hiNc*@4M$ws=x9vPR=)Aoo_L-ZR$h6}b`+r*FIgR}oKPY_#$4WuH6Ac$loWRU75ZYuHPY z3ommWR=EBb9^w@GwixPQt^v#Zb&tdBPf9BrO0pf1w#OWgo*ROTk80m@+5>ZPW^0mB z44+SJ8zF8@q_zza=Zz4TCW@;b662)N^P6FC;OFrc zm?;1iQFG|!=UbPVckNg2u+yzrWdY$$O=m99}!RN*n7^H?H}gVb8j09O`^>3r`XV9)gWkV;QHu!eIwi{+F&o!|SfF=HwR%C(i@|w?E~n z)h?j2z@`}mE1y{gK(s->rYI_)HJo!K~D`52z>h|T=)|%DSAc#r<4-_J# zj?M~$^o0SJTBzICYpM+}sf5avw$A5y*@iklW%IS0Q9tz**@u-~%M04zt=V7cr*Zz7 zs;D!`EuB!*yaj^;T!Gy?1JtnxT_v@O-)Q__pXL6iBLIa+=<}qpb&@CM#y`xp&|V(;W*R)U{)VLP<_=1`_@{6Z_yb3pNzlI&p|>&~m@JAR3u(u=;7|BavGuppsnLx>=JyCnn)S=({2wkL>s zj|`2k>6YGaU0}vbX6O+;tes>05G8MF#f>G12&HLPmzlSRs)J@?xHd$Af9_mhZjPfI z(i-G;(?IK$GswIs)SvG3APqQRy6>;fJ?SrAI)#FyR8DU>t3>VK#pEI{;h{g_fssu@ ztza@<@{CUkoNR{78e)zJ<)cncu;3QhN1bbKZ7xXIJSlLC=?|j33`!63Qf*XDCF zp^eBDiGWyu8mgE7$oY76DI|cH1xq18BSl@sicaJGGnxS%X7A|z_n07?o{`yeYV+a% z2w#?a@cvo;r#Gy}{7XN*Ze8kM`Dv%s;$QpeHR~?F`cJP~r{NlV@uy-~xXAhnZ=DYd z7GLip{2N#`x_U6n#K6NgbP2;p_TpkmFkuC3Qnmm8Hu z*PL&2@FQ47=On$x8Ri&4<4nD~tJAj}U{{UVHU2~{a(sIQR9ls&{j8lhIr=}_$+ZhC zwXS^ViUQ#V#}~`}$ni#eG2@P%2DcUiF- zn5(qP^BRrc_@*FN8zpLMGqgRLd-CLaCr=tqKANkdK6|f|GZesw2===IPT8abe1Y|X zNfQL}qU!&Ly*H1G>dN-TPmMs;L77CQN>db`QbE&*Ce8_yqM~WVS=(+Qo-%5u+i21z zN^TGngG%3vW#XKEWyOPlk~U~}RHPd!wxU9!6%zqP4e=OR{t8%8PK_D6qa^3k8SDH4y=5j60y2t z`IJH#1%gsWK3y6P=fn|7{73w|+>t2ScZ75HT^NvK+H4WwwmG4;#X+`}l;X??KihWg ziU?KJc2u$<^HH&0=+>02QJDsFhoVxpp7QM}c-nW6V0t34(g`He;Y~-Mw}mF!ruMWw zA8uP7YKxkQ5`{i%+cXL#3LVNYCnO$^(P80-Wg=T73ZXcaMttvTQ0{qWEF^&VY~Alb z-T_sf=i~d{6=d%_XjZ}`p~oHN?FEdz;q!1xjTIa5aYKw|SiC8j7HruT97$uvM)_Nf z5d~Ob2Ay`Ep&or7yRF|`R>pd%e{(f%Zq}>Vj?<$^vqC??UEJr3wbHFdW$4sO*BXme z@durF+FEXWRP{W3Cb5gTYUyj3s@^PCiEs}KkABSana%M7I_<5|-eG8O!f$M+g%2(f zUelcVW&1MDi&H{~w=FR!3%8jst0JYJ2+PjV0d!{>hfiI?`q3Z{1XB~Q#^4l74-KN@ z;h3*#C5@UlDZau$g*50r#`k|qOM`{9G{~T(!93f6IhZ%2Y@rrgR4-dngl%ef+w)`qnJSfx($>p3{F;A>Rj3Ab2QE?||3@z|^hGGF z;XYuDN||4lc%; z(#_vAc!i^xkjcwqg<0xA5RM}lQoN_oA_~;}!ITIi8vMQU1tEA%U^|c$uoU+&G{&FK z+e1OxT_8`pqy)H$)Y#Z4rnE}`@TTo^ljha0JRq((%@3hz7KG=UBo}H3T!ER6T z->1e(?q}6TP!dwS~>xncf2Q(~4pwmAu1Rlw8n3UAsZ^ktj~ ztEINfLnE|=zgSeG?R@<_Wop-ANN>{>E8l7^CP%bnf&FuFrZ-Iq40I<-G+v@2%Uzg;CjyiTKwP9eg*XM$h_PTQYMb3Z!yRv5C6S%FkE&iJ zt*)0sYd{U}Rs(9Dx+-U@#@}~{2 zG#^sLB=Qrk5YhA?h5wEi<&dAM3?RWHk}v`yAf%*4fY@iAAosZ>248`NvDvY49Cxzh z32?&IG-%PVjlY5o&j@k+6<-|-gU{dgNyAV}Gp=MiE8h<3<#4ua3F+=~I`@S9res&% zMk z@lqagvuzOax{!4ZVHVS5#pdnLf2hfg5ZSE}FAmC!F+_>WYcQ;eZN##$h*czD!LjPc zVTBhER$5A7rB;yh<=^|l<|Yc#mxM2_FU4O2mBmFh=4jFGZ4;6e0A$44L^=v&0Bkha z?|gAtrBHfpzzIbC`b+tnFVPXMe<|Po(mP*NrJ++!M~q^P$b@bnJh_P}%uOaQcBfN~ zzFw`0IV&IHTpPL6ijs}0C1*=c2W6E;l}u4d(Q?T~jccR2?Z~R z-0G|u(FbCL!AeDIBw?4YKcbtAj4dsfkFvZ2zD9D_(5)FJ>p?7K!ZEzG38ej(n#^W? zm@}uGvFsr~LMe6*;^E7=4L~H^(1>H>BHi)~Uk_QBe~tcLK{vt+WTVG-7;9>AmsGmg zjH@;ukK*}Jfs*07NroAVby#n5MNy*PcP@AMeS90}K_wse!v^{XX6gY` zGfX27@Mh^77&gEle%*dTk5 z6LP^S$LlYA0)P1v#b2m~Ke;IWHn4SCyC&k`c(9kXkp@^A%N4x#>B9OpaS5csOABgI zW%JwRU7-AN7wf^%1KuYM1Y1jgyqvpO{=+#e1vw5gIsf@ghic!;_7c^;KD+^Dh*B-_ zubPJLcozIA5Cf_apKFzV-GJ6*LhlUHz*-1_y6dF{W7N)_AXL@@np{;Y?xQB9dY3! zzj?0_Hs|div`_Mz|aef$-?!l-q5u&=i^BS*T_WpG*@hGAdJ7brtQ7WxB*;j0Czt>e0 z68~kwmHOv%iwnRmUa6M4d^MI9HTe<2Th$Lj_fk`Pnf{A)?tylvJS;I(JJfhI(+i+!D%xp8DVT8es-&$JhuVU3=Q|>Mt3bDaDauf z`ym**nn`{hjVDb$1HKz2jJTn$P%XbzSEvIyY-c3mOu;#s>Pd>AaOza|-k`B>@8zu|nsSxF}S zGU>*3;L}Ee%wPx_pS%&I-){CT{+I*8dF{g5&y;vVRV_b`Z%}p@@E1BFoQcBSwWS)v+th=|Mi}R)?>!t>yNy@N$m{5wfvgr|FSb)SI_V=ok@R)uKQ~{uoyFWrejCfjERVV(nmH@TC?YtBGp)$}mLXN8gW7pPt z=?Ft_iTZ#vs7^Xr8@}D!c2eb6D57__MCFZ;|6GR(-U}QMKByBctxCCVapAVK9=5~? zo7HR!>CHosoh4}v4LOhq2dVPYK>6U4_26^va~$jCu(c1o^sorw3gAfrxMnXIE})f- z^bXY}U)`f)T&DDl@D~H&k6VYpxpseX;N#pm;uY4wQbk8Q(|FGF@=>~-GqPGEjK5U! zalHG_Sp^zboxgKmWd2R;UEaV*;M_O0P?I|NqfJ-Y*e6f<3NnJyzRK-%<%N4%rK(n$ zU&{~c8g$TEtja<7b%xZUkG9`U>EETKVt)B3%nm-7l8=Y+ zW@$WyZNVL~&qo(;|#|sHDRd%vT?R6WyP;BBNLr9KS7A-tI^j3&@6t#%N%9 zi>2Qt?0in##*Nq06awke9^Ac~6JPww3ok-e-=^ggX0Bgouqg*yDMW38zEA!Ymhq9Z z2k`?Nc5|E~xXh1R6U-eCEx7WP<9 z$On*RRM~;ssLKaFIbKAfqdk)7Ir&F}t4?by!t;QpQa%ucl0G{*&@kG_@E-d`JRnPs zpPoGOagoPZ+_y|+sxlrue?(QrSF7tG?E#^cuxj~@+pim(mN0b8iMMg!0Je|YY_Y{R zRx{c8OI?w|l-vC^g}yZh<)K>RalR&A{EF(7=RJ^-bb)G>jmwtbV|qbT?5GAB83ky> zj=H^?aYaF!fNjpUYJLuQq@XQ^ZELl~cLN4>Rjkf})U02h7Yvuj;t@>-lhTp4eK){> zLD!d#H2Nw-Z>nGUsSdMiHz(LmDQz5Ut*~^+QdaWR&0(tiu)d(&8RThK zO@oKN^SvGy5KWn_@m1v@$KiNZoO(6>%zQ2|-s${MNruwFdzJg=%kz}Pe0GrjeEqCg zof&5K@03O~r6EjF(m4K;G_ZIUfBEBN+F^PhVun2{F3R#%yw%;5WsUNc-O>=R*x0^# zyh?sdY=jz2mN>HV1DZ{}`XY%q)(BItI`(aLM4WkbkK|tv zBF}*PB5kVGWM8=HYu{OLwgX&=vk^9g@9uD@Vys&VG=B_uO>0`QVpHQle};E_wDFj8 zpC><}H?HEL$4m1 zQu9(ZxHtZ-o4N}^a!WXNDPnrFaF#c+xs$^pq*G4Rcui}zytS=pD zAxo!ZqrRj)opf&Ypy9zK8`VhtqkSR8#xhi+L$VDc@%(Uy6&J z6ZPHQE=?MJY&8~vl9nrsJRF?8QJp&vgydlY(lzNyFlTiHzygDs8a zO^u!_rR94}`eLK0%D$5=oY5Dmo*#`FQv2Zolf6(xRGHresUyUiNkWZ~L746w^FF)qDhgCI$ zDie5v@z#wgh3TnPsr9wNCnF2ex2j%^bZ#rp@Bi$$gkixsgN*%O^v|+7EA@xM?cMN< z*@+ljC0(n+g7BjOplRoxLPyb)3=2OnGu_AAsHrP(@!Q^aa}{7sF%=c2@7YF&kI37r z>K8P_LKhAVS@E=AzmTDEhQ0^dk&2yhAj}uba0F+u%%|S0vt-@<;s6&LJD(4GQOE1% zPg@fkf7dd9kY#yAY~G-(b=gZT-^Y*5&a!>_J4&>gVjWi+B`>_?)~a%U3xHa! zw8RVW0s$=Z)}J4*o0%5HYey_3y0P%L7E`kG;1FkCpQdfWILav*t9Fc4#n|o1mp7~+ z-np~9*`0(5g6iClL{0SOb%={4hFh=l=+#nrON^ZY-U31Kmi)ICL<4UKky%-Lf$Y)) zLWUp)k~l5oGNflurhCpt;b;pH`M_z6v0#$M1*(WHM{CN-TAi>?i}!eQva%D_Pv7&- zO+I{*<_gJ#tX3V;#+=!%+dG9`f=vxhJMh)^YJ$gAK&77$D5Prq3yuBQ5<88}AOR-*f4 zy)f$*AD)^_t@uY)ZctKZtVa>VCN-~r9Z#5X`gjYWM!zd#6~uVLG#8~0>PGskjaK*?`Q}gHM*86P@WB%e+1@3?b)f5$r6r&2Z~i26$_PEIa4xwK@_Ypz9qaC>x~s@R54ni>|Za#v0EEPC0!=oQDwbB@}!?D4Z% zz0%x;B}7X7>xRsG^B2URBn|$0NS3epE;e|@sA1p8hH;OD5y z4?jn*AI*gBJ-v$)Cw@_^l9RrWCzD{CZ!S2Hd;jc3kxF^sKLMhE5F+6V={fM!VvRcX zzAa)|u}V6Rj#Z_}oepaF5_uKQ+oi9+hP)*MhKT2H#P5!jzrG+W`hwr>OinT$%nu88 zSF{H0b>SAGt{LDp?LXUBq;js#Ad?=-lMu=`}#72bq^EQZ{NOL;v z)n zP`##I6gIpMP!4{xgBPjv_uEG6t?ygba$tqe>mLlkA>SQWal?aoH$891+H-~vuxw}7ogB{1WASR&t}f__j$Is>VI0mTFkU;Hp0W4wJ$U?kd|w?) z1a-g$x_|W;6qyD(*+Tn)`WyB|=3TILY0Tk%&x0&2J)s8m1$b}4;aC}nW%Vkt;DT#+ zgD~|XIiyy=Vb6tv;GNN%`*0~mnn{Lv8q?r~IGd;Dny2AIacyJ0U{X6getu$j_O>H@ zj7i#G?Ae|i#C4)W8rc$p@@(}4Sgivs zgLIZiEySm>e#(a$6FbXd$PW$W{VZ$pUQ{P;=3?wCUw(V@_hW%fez99T`JvX#Q@|%9 z^N%C{yr}X$R{ga!&C2Db(ziF?M8X&a)@89CdqE|?e?rmMY7E5d+Kf+Q}KHs%Q zOaSu?)geg>f@Gp^6l#q@pvr_h^#vjITDC{;=(ok%Z@X!m$!dLJyW@o|9rOcCi&s9U zfa17oqYapsvit9F_1|jB$zHU>RODK;RUUJVb{AA`1nPec+u@!za3ZbL(o=H*NBd*& z0ENu=Tr_L}9QYP7vdk5q#;q36<9pqJ>n`sABb(9l?Mpn!@)8J#zd+2#i}~#FBhuTOM#zNjR2hrH51DB{G%q(;~%?jQ+D#?#A9bf*A>@- z$ennI2Xv2}7@utTJ3I!|v+IYrW`rjmjXcY{u=URbZ86W!;u|@>f*WZF1qm zl0GZlecp2P$t=fnEZd4oOBSqjEXd5=D}BL zquo=UjEn59;3p*5q=j%pb&>Y4-tq|?xr-+r$L_yt@w{SbxQmy{9_a=g5vF|24itTB zUJ#^~f9=H$(iA+$JV-C8p)Q#OEd3YzPGLED8dEU(*^#q2tTIBlEp9>_IV*qmxm-gN z_Q}tE!;|!DMNX{!c|9!&ge{Z+@PT$KQCNqYaAq+45e##y@#r5JmQ-uxp=4N9tr3Sl zqAkg1N*qejJW+PsPfXL%YuCdQ6?dxx)K9>Sb7-v&9d`VbdDwySkK585I)_91N5h1| zltV!!4qTYXNaP_Fv?NW7p_JFV(zH2=1?u(Fozu%xo&wu>wO0P)eNa{sClnQ`L3o6V zPRhMM=RiT?y@hJGLwDrj`4QUko8>pu1NFWH=;CwA z*V{AF!1|Yw7`PUMWrvp6$8RflP`Zag)=%Hh(k({1;@bO%LJATmlpYR2x^U46Z{45s z{vjn@@y+}2J90P#=@wl)KT=zMtNccYFWnqX-(ji=*7a}k-QJD^2W`qQHBug^Th~5b z@y1`}3UAy8VhpE+1nLLIoDfTnlbl|_XBKEB+_3BHPE)IP1~StQ{$6E zfM=mu-xdh)VREZXGgd)6nMQjtz88$Oi5+L&u#Tfh!_rdT9|FBVZ7hOTNHMpjDUj1( z&QX8iuYz7lfe)!qv9f>$0g!Qo9^O0-qpPi zd>Bq%ZOgQ_Y?bnKD%{fC4{^m`V;O?80vy*c5!Vz}%zNA}D@yt=n7waL;xr>ddhb!T zHM7j{_FuF@LyN+AD;A)0AfDWIZqJYaI`a^AJQA?y|$6S^^? zJL1K`t{EmKHPal1Cxm_R*xqxSl7BG`!zB7L=qO{VmQOqh!g26ev|rC{!REu;u=<6l zin2R}k^Ec?=w)|aH1JC>)g8UVuX^3EXb`}b-r-Gf1y{0sfW)iK<#~}AmSBfnLN2R# zULeDvU}--9LR!MyNxT?E`^P2KTFFX=Rn=O78PM(J!HSngWCnD5Dee(%xG-HlL0%@f z3}S}L$Z>0d*tHdUO;Q zN3eN#>(S^=+@}!W#v3zR`9QH(8+xbLHd-<>J}z)BATIYxaQytcE5}xS9B1_?d&t2- zs5IE(2U{Rqh^0a6Z9ZbOZXQHbJi^*0>+XvN?pb5;+U2sh>*FBz_zH4US1YQH` zzGXrooQ0B+;^6y(g3WG7TwO%Wxx4PhG=;8I#T`cpffmHf4=gRAFo-Mx7 zO7xk(LcG>9rWIdrar)ZYmj`LZBTz%s=BK&0f@3G&T|d3=^az&jQX+L3>WS*3&TN%e zGC0>v)`bqXm7=;L(j_+c3tGK(M;yzj7?YIKN~@}Gf0~xO^qO5aMNVg95U-|6(P1f{tTZuIc^TgcNY7I$*k>(%-^2rCF2bB0Hnpc9n zHllQYQIZo+%8ul>SF5e#MhUO9@YXx&`N!8Vtze{ z?m~I|k^gY=m!muTqd&dr;};-26DHSV`O_hfd2#KDjKWK!?)@tSI##z<~i0A7Y@|gbve8sk(dRPa(rgP&}J_u5R0$~U` zc;*B2ukF(zEJL67!lu5{y{PZ}&~LFnh9#iBkl{o96o&e=E-ljx)Q`uOV0N%P>d_?J zy*EF_Z2>IH(!1MNcwr5lIe-z(;|Z3TR@4|`Mc^ftB>I|QdA!#sEoAL{&NO)DCOTTB zx=TbGCnqZH%FIcf%n6;$$%+|m`#V~GE4a!wL2DUQFK)p9AfZ0_-%APxPnkxDttZN0 zy;cfG4OeS%u56Nhd@A>n@Rk{fYXj>C8u%T!HmMcDSdSv37)r)cGRh%{%X+0&=%w^Z ztz>QmH1tZ@#F~!fN$KlJtglb-_I2yH(f5+TI1Zw2-+GMdpkg97fvs4glTj@XP%?%t zDt4z{0)sTT9i}&rQWWKJF5imW1W?A7kWpa9Rb-U>DUZ}qDZ8wrYQ+%BBYK?_+zu~W zJuWfh>)b=wM}Bz=sCfAn$L`PRN;wy?c15BgY*?rFzYM9xk8X(qjo=kk8%PdC(R%$ku1P zZO9Y$DXR zoDoN7;ERx6Y8|VkJ;Ih&$QgwbtpbGiAW?Xs;|tS@lr5i9HmI+@hYmMTJ6d-ywJYFr z^_kyKGR#()=B(9o_SCM~0pqH+x@t8pgQHM|WHTQg#?p`aWRk2rD z*2)YzX%)4JzFJeSy{(gU9G#}HHo-oAB)2E{J@x#oy8|`bRJ^9>)uA(Ec|D!blz-^= zTb$|;B+OC@Bw
9**%s5>$vzu+EskGj^QK_YT)Z!p=9J<+rQ|8)18AD1YefusBjhlw&gi$H5ub4rj1{K$I9N%vlQjI>NFV>CZ$af;cy_& zQku8DG$VCcTIzb(KhCpMT%RDWOrLN-7QlK z9}!d6DRfC;H+A{#=%Y&hWKV1Ugg(NI7CsbmP@^_obcp&NNDmR!KjhvN-?*5&o$r|! zwF%vlHcJ8EC!{nmwtX{8{JhTbzGX{9m)s4G?Jg5y;=Xu_QxA(iLz} z(i+zgn_O;89?u*AgMWuFm^+o%WKH)>PEMLU=b{EW;$USwn~&g4Pp}@n5|Zrpo_=r=xR+w zuwz%=mWWWgR1*SJatJp7$+F zPX)U#B29PI6KAU2<)u?GyoC`n!H?q3N|t_gN0{Y}tI#xscnpppxr}0(@DPMVRMBu| z6BC+fWll`ZfRpK)JN=jkIk#<_9@=C*+QU_D!J}dTQQchCsJaQN{=0IDd`+Uc3(flb~Ap{e%> z(Uc3FY-^$}Bt|uDNLHX8M#c18`w8m@gdSAK_R#4{B}*POM447P&)P$(W14({G?P$I z+7jU$a}+K?E7M6|Fq=#^oW)!r%_N**RbK3C0^w67P!)5go{ltqY5z}k-stxDy9aaw ze^*uc-c5MSOxUOFk9TqXSC8K~dl%znh-FLHVn6WU3=l3cj)&{iXo@F|-2U*4!5nbW z>j|L^Wq!hIw+dEcKv#hkN&5EdNBZl#DmT`vTxbR(XN)(MvfG~FvjTJDw(?t~K)(NM z{3nTdu~t5Lrf_;0SbBxZGk6C9+l`ydzdIgun@?pLPz=6=&v9lj z3W!sNa7!LyZ(`&Jh`Ls0$dtP%23~sqp#s8PtVc?pd*?V$MQlUGd^|mwSP{!+)cN~A z^jl)47Od2Yr*FX!QyC_ox^<`zU3=NH3rruU8MlIG5)hYD$Iqww>qRY(@&3j`GRU7o zgLJSBAgLajCLBGZ@Hd<-NRSVlNvxQ!#G^}QVJ_$LGIgc4Mk9WYp_VsRYURLIdiR04 z2#6I5Zi3f5+Q*+Hu4Loa_O4>Tvu+|d?b)ZDxKjL zJ<2jYFYR+}+?Z2|V;J+ze;1}Gam?Ny!_lO@d$1E6P1-*)(>_p&rYjpyz3KgP zxZlQ&d7LcfWeo-&mY2s8WAxCo zR*%+d?dhZgD5VZ)T*oB&jgOv9_RLnyDBk>n!m%W?RXqC;hE<@0oOy`j8FFC;xJaKq z#CBmW;xGfeq)#4Vi$gBVK!K|tD(R(jmdXTc0_RRpl~*RX6V&Xh!Lfv8-;n+L4+?u9 zu(Ql`&{ixp6w;HRI#ZM2Nl>$IM@V_s16EHM)>N3-vCHCrXy0U`Uf6uqj4cPun7O`h z<<-vU1IUG9|M+@t^l+S-LU4<+bynP^itAuJAHi>z->J({o3`fVy_}Wx@;3E)t5vP_ z!U7@&-ss>{rs0wAuvatwX1K-T$O>tB?MC?*irrW5K0Wh#N{%-7Wm{P9Oq>ftkQlW` zAiq!KPT*L|`MTWygpwA9MBKvW_euRu_}BrJJTvE9@zL@2JZ0vjE#>MhHDWz z1+Wtg4S+58@kU49GH>pA>#03!<7Te?#nHFW=a=7@g!?y1l#0hM*nf5=1xs!K?s5~L+h7b*&?CqQ@)Z4wJiC9wn0Ko^7Xi4!9 zja6^tyYR-%3%IJVub;w2*nJO~{Y9>-*Vm(9-}w*+hB};EH;3pOdKj~N_^ne3?YU-c z?=5sY{O^45zyBY?|4GJu@QEgqjQrmQ1Mz6e|D6u_&vR9wUk{=Dulh;;R{>SD4I#Sb z5RG{8M-|cmm#_&>FDtbL9A)m8ZwRo3jIdb+Y`()c zX>Fm=wy3_g!MJpYr+NHuV&r`&K{E)!OaMgqa)0BR@COE4l-fSHjz$kgZqnYwfE**- zy@649iTvG-wRf<|0cV#Q05DEM1J91wd;{Q860|v3jzL=t zLeQNXpx>+#?%Wt#&j1OJZx95bkwK7d-xzydNuU)DNB|mnf1@ODOGyA4C2;eGzKR8w ze_(+{Xq1~9O5p1hI7WhotP9s~94{ySABEos_2V5!P;K*X;Vo#2o)CPu(^oW$*h^EW>?d#&MYM!u&-UF0|-xDZxtaH z`V$6om8ic}tU6u68*^|aY-kUE6ES0v$jluvtZsw{xBh6z{J~OW$#cZ!3upVLCamd={&nuut>a^F#YulAOlDmch;fwLH0zHxI=THUxt4CXX{)9N#FU zG*%TsHj3l2G`5kdT0t9vCH$r__mqH#P=QFY0mnDJn)1WGp9oHBoaR|QsyQ>}Czt1k zeTB-H*cdB0mo7*u(@PK9!XqBzlI+@VsUPNt+IX`s2fn0@{Af9$QF^u!N%Cg!k0E)m zX!_bonu&D5bW7=q(0)E$VN83v!Vi=#F54;TV!#{I$OfkI5P?8D0)DhK0um6ACuvS& z?h$dpO_wW6{JxgJca*uk4~p)_j=qULp%T7>+#l&%e1s-^#6~7gfz}11arSMU!tIQb zv`Kfu0QX!^$)eJ3(9P+a)7jj8W)4uV_smr;nn&Uw%=9cCyCG8p|Qa`5BuuC zr^~;)BhUHL`sb`{=Nnn;ld`N+m3H3wzAaRf+0d(L^EvL?G@KM+i~kRnLZ<6Sj>p>_ zm40T(^zZAh&8%A+$Q_L{->-_xyf;?G&2NsMzp$Lw#g283(}HBEbH78E_+r4AZx)0%R!DNdknUsDO+yIo5-5;5{B+WgznHLZ;i+?967w9 z=v3uN_1j0m$$L2D{Pr6gtoG+V8=1_aRBOSwq82|tUZ~k%O&vVb(VidBBf))+`zowG zFh-Hx0NGc}o(_7lX*n@CzNT8{25ATX6{h$8G<>eOb0$jqkZg^ zN|o^Gaqyn%s6Vb?4-KKOpl|>3ddZ%)5q0hYSa9}QUYaCKWLsc1Zi7(BYp#q5w(PbD zldg|omp~w7A!cxFTqJJ0lxg6BtF5$&hBL3BQQtccN(b+-?N@cHG~i3A#oM1S?mTJ~ z`+fWSWQ( zA{%NMR0dQUR2*^yx*TXbQ-oEug)ddEu9;GMw2aSufuuxq3uyA9+OK7&jgqqcr8&4MJo1aC^=w4LuU2dems@d1KU`m-z z{{02W{P4qbdePAjUdmZrsLy)S`C7jfPw2L$o3?FBPqn8GF4zb$#@nmVe-Yn!IK49c zdP2V0QnT?&eKz0!yd$J?W3lScYCf>!bPstip13;R)>gTC8_)1Vg;(bxdWOeCxn&v` zXf9^RavY}Gu*+Hz+fOY%dBasPOc-**9;(G)x95&MM2A3fKBJq`cqsqv4zB^_g=oDC z9>%YCng-|iLm;8mo^+GyhK-pON*G#7X8emVRQLW=_w9G52&cS!uW>qywS(`q8^m=` zNP{iW$0MX0h>ak^3zr`V!#|@m319hr7*{o-?fbYyy-TgjRtGqo|AiF_UO=80;7Zit zMG=QHD=TXO4B3eRtS->+HHir8{4OgwC^1nNmq@H#48JvTiHtai^2q(qd=nwvyZsHA z-G!FhE|s3|+5M)U-%ah%n_y9eP@VuLyoJpuY9&9ATW(2+WTMOCtsaw=z!F?(MlH^# zyuxD{9`UDBM(W3J#O$Er4sWy`yE{+wx%w!E>i(C^S{tQfy(#OG#Xq5jZD7_MS& zD+KGGe4!D7AnmE_3eu+q8|5F``tPE34$1h-54SOzFlefN;@aAI8Vm(m=|W?U&Xin~ zvrd;f9n1qJYv0Y6=V|)wYESbJWXW^t_`JN0+jMe0l0D>y^g5Ec$m$*|m+OtAiRb@D zBYzswXQUlhSZ_rc>0MJ4u~2QoYk&5n4&D~lqLEH{jo~ z#D5qcuU(%!!3uHj^oFofXP`&=f!@eyoqek@O!F}V0{Je&I0)9jq4WS;!eWL|LM0ha zOP)p(tC5qSkK-LK^s$Nd`HQou{Bk9m-oG!_3$N7jS|3<2RLCbA=geUgPK;S499_%ZCf+5C+G!3I2Qxooh;;(kb=R6)KlY8~#4T4J^!dE>816G-pBjU?B@VBnffV=4g;(itH=roZ?Oa@*oDJZ`Dgdwbyr+uUHn~2ymVnnb zj=k)i*IrlmeIGP?qjl*^kt?F^vk|aYQ{UYWhPoitXOg;UMKIox>-+sckgNzlOT1?- zO(n+R;&!}YhROPq8fRsK$D{RBpK(u8xh8R-#1(7YlNd#Al15(k&@{N&qjg?SNVPV9 zy1(I5g1!vb(EZb*HcVX!93wk*|6pUuyu2z^{uJgujc~xgJ;hGFvAsEAx|sTbd(Zw?7dE zIADYmVp_<6rRne%I<04Bxxd#@9;z73!VzWwMK%T}5yg6Wf2|2?nmyT(nI`+&o41!~ zt&y*pcVZx@ZgC)C^%mE&iRr7dov&tPBme7P+Be1;xoQ$(BJ*0ABWT1TaF7|qp)HBW zSYa+LJMk*wo9tUQS1CBS^W3#CedhOkI8Oc{%T`~{c#f)d!p$~t3(vSpS2?Kth3Zdt z8bnns-4Vl4V&3+0MQ{}j!Q_uIDBPfzdtTsEoI}f-1_y)couBkeS^jd8LHe?ZFNj}U zo@Mu#>{(-@gi8=&E?;WO+8h=Im($gKlaV?Ed@GfQaP;g8zWsF|B_R= z`LhH09@%q}alKl{wkw;*tMc}gXWqzrCn)coa>v~NkV7x9>0kmMu$HcjQGSXwc1;xe zEp{c7Tpm?}HPaPce&x$Z;%zI%`YQWKk!!ofxhqocdI8lsNa2}Rc&y|*ExhrgIf2m| zvTHOB|7^#&|41B{Z7Q{=-YCv*fm~CZx;r`==5vrKM=7?BFknwQ%1%vzKv?&JAI<(Q1S^Lw4It-%D;%pk0QXQ{xGY?y*mKz^0+^*rbZG*59ey@C+ z*}%WfAS$-f!>|FpRrOjy&6x(DCQjME-0Rg;sKQ9Q2*o{q4#i!B;;!{z@3#x%LmBpw zH(peoehC~}M-S=um~^%s>JvM{iy<$y)|I~Q5Qr#Z1x)MtaJ`00g(R$+?DX{+>FdBJ zf>+5awL;wuyWo#EFiD09h=+B&77}aeYck@6ZS7DNl6CQKj?{ zw+5&5oL>-`5;2!(ImoBG!g8dnBz?mnBP2GlL@F#EXBKAj)3qg$8{lG;_SRM#4iRl6 zp~dSY?)N$g6ObP;P<+yc^i7Z_KD9-E8D`KBzh%cz*q>U8)ypb^25OETutEOt)J3-E z?Y8C5A115gwiO_J{rcl1@d}tRu;}RNRe&T+p);waZk&AO? zt>w75-MH$Tb|R=+C+)8VGbbMMJK)U0+9_-G{HzTL>x({mw}~cI14)aC+r=G+*l8xa zK~s&b`9R&tbm>pE?0E!@u)DTQTdjKCkb1UQBjnctbF>>OPM;MEYs=8GZhEsmL@0Tf zvC%(!W58s4_R3ksdU5~5qUj_c@DICi`CItlA#WOJ$T$^*t$DE@q_!AK4^NTrUk3lr z(lZ}A%RhXPKQYkn`V`Aw%2)j71biWTuyRn=+^7?hb{>zx>QL~^yS=;Kf+k4G7TBJPa8WZ(kK)J_bm#_k23Tv@*_J9PnPJULxYMn`%9*Z zRSySZ>q@6VG*+!<^mCFrB79pr$>1`m;j-09gtcPlfKwK7P#WvhfUW7_ZM-ux9DBHS?!iCX-Z*r_^n5OQ0^Su~ws?C^bAHE#<3Y2#*gLtvoKz$=FG@*%jmR+8Y}zki53Fti~sL~+Nr zAu!kz7=&0bgRzW4R?#gt1QvET^F1B*OqXrdj5_}kHD^mz>mxbH+v*A?Np+%H)$oPd z^9jW4$HxyTnWSo-q)wc~StnsDW&NZTk8Sd*l20BucPF?~IcEXBD)v>gug1Zvn-f)u ziR$b`O5a~vH~KgSNNpJNBs&uL2NhPqn*22$&??W-KN%-lj` z4ELrdV0imYGpMj310V^zBGz3I?@k}d4p3gK^F)puu4||WN}Yb?CiddjyW9v&t@osl z9O|fdRm7K6#8%^FQfV*PGQ{z%X^}zbh+?+~GwQTr_=A~6Dfu9*7%(cM{r~4Eq+iwM zsCZ*pdO}8eVhL6lBlutJZ&}lv{`9pc)MH0+UXPQJu9Ly0b#cgj#bkH-v$1i`?DTk7 z`mmDpq2+%LE~%JSl0FKTX48k4mpDDAA2gh9ZAgE{lRmnV{IjoLtT;aA8ilwI*A{*fqWP_${jfx z?0oql5z8v#Ga};)bi%OKNO=gn%&`5BRkagk3ii%iIm zipN->Yp!^s`P1P1kf`FW7TI~_FTvT^*5zcCI&cU_lSnmXOt=&bE>;@vy>iE2IWRvMjd zC;IbY3;7YJq!g7}<>U7v-TlzTJKK|$KSz$Huers+K=4?p$bATfP zqhAPHSA)KzmRzm$?qQiq+T7YtWlaJd3jU190eVTcCm^wMNOr#ge7A2392(BvQ{V@b zcpyl`g$?HEgQi_Ff7j*wphTPH)~=SzzA9EpOPbRbJWa6tN2*5t56Fe_oYP{Hoto^acDC=a+jrR5Kl}FOSFkUao!9|Un)0u` zX$t9-Wol>Al)TI7O5Qif&m)vzMBb&}Ki;|B<*XLP8Wf=V8@>n8vI1PEnw(qL&?+XN z3ad#}&TKDSX|50MXpmY_NL!sH+SyMHw(@y~{OE2#?FdXped=P@LxP1V+0rj5~7W^$Y`KX(FyL z_tDtz+rV+(z0O--;+gx|9&KSirQX0VLdb+Q#p*!A=%y?FN`0bA|NZ-S4g5b_1Alnx z$stcBT9U@6SZ0nLIB58gr!28UV~54XT4D!}h#NfOsbT+p(bHZK2gW}2WbBZEN$I~^ zuxLU0%i^FxBP?;TfG}3funZb*85H}1Wk}qM{{oItr|pNs`HA4otLXpE^7V#)5$4}c z|E__5*TDa?HK0*F;1Z5;7S3W;1sw}gkMavX#tq{h+~v4`Xt}N&Tsba}+s_^2f}RNK zrpH4y zo5(HT-au&#_-b_hRD(DR4vW>uL0j8S&T^e%k!#j!a#h1sW6V>9Tt!a1hF<*#*QN@- z3TL&(|6jWL{YzI>fqFTo=lnFw@RVBHv&%Sc05=NXA?!Pf8>LbAQ*i_qx#LE0Z5j>P z$p!Y_uy-%udU4f#kQ=lj>Y3w$9;jlWx3vvo-#GR~2`E~SS;cgvUHlbW7RPE|yHnA# z9&oP`oGeX(3cm-cKbBJszk6%~_m~SVXJt%KjA-}K;@9ExTqhrY{5pIlck&6s zufr#;lMh6bS~`5Db@GYFufu0XCm)hM7gy;Z3KMl*az+rP;LprC|J+*N96 zRs%pXR$IVcW(P8nJqv$khhZn;W~s86KjAXDQ%`wS^MnT8s^09|i+#=PYhvFZ_ATK4 z$lX;b_!D|{2QUX%q;Bi##UJD{LZhQxs-Mur1DkuZZ!h*Wv#*JLg9zg|&OZoWH5a5x zI!3lceAQe>`8!(eO_llB-VY%8hJAY9=>?w{=~mfJlwiK5lo-I9%kBNklhFs2u1NJ4Cy{a z?!F(T|F4wKKd3LqT}MQpI)*`=%sHVPXL;_&W%0%NxiXPI<)O1bnd$d)I{8r?DueIW zIV`#R;E{i4JDI6YJOBQbKe_pof9JqnvkyMy!`DVs*M3kx7v2}gmtJSP7cA3!?gabi zeguQkqp*ZKKmGY&kh{s4En$VyRyd zJYPLid{Vx0l9}?*IUk+v;KfB;?mETKdQUVzs}=KXLp~O>iz%bN985e`S(+M zir16MPvs=MAhXYoVXVFUGD33n-&*_vd^-$6YuCI-K zI=OdNzVtim2-5!Zm6ys$KVN46rLe!sao@cT?<|h{>Mcc4;NJnregN1FK>vjfvy%|8)6ap1*$s6B3arg-pF?%> zW=3DV`|5$p?)%ZMl74E9Mx)jFDHaOo{jJrGS=(8S!H;&8KAp_;BRl<&e4Ed%m??ON zqA)&x(tSU2r{7QGk0HpN6p&=y#!0Ygn2{58}o#UCYgC zCOfC@@f~zj&aX&EL?`qtcw8krDskyu=;%aONhd%L)bUi8FZ_~=hcmYmc4(?^CUMZS zxKTf-;|ULvp+gJGVY-Fm5gv9+>~lv9b<&+OZ_y*xD2+*emc4R03>P@K4P}tCd z{-fg5P~250V0Sdba4i>)eQyF(@Kr6RexT)&VIBj;FwyPDWx0ESiqwl!K@q`X>9OA2 ze?YB-+636an%-OpRM0U$E(|c}7fa+0b*w5+)mbqw_MuR|afERFQg`4klniwP>K4>( zs5?;2PUozBlMAi!u041|kH0~@RXMrKo~yw(7(6=;E95B!LZf(r46n0uX0r3wfL z3r^ElJ zcBT9s{y(+*Qv5mHSD!zp`|9)ObYK1doKEQ;r1_Wqls0+0lbzc1%T9L6M;B1EeBsd+ zXtNGG05{!|-Wohn?YPr}|-d*{4%^C_kOcN9lK}udMeN-nPT1EU_{v z;r=&!?*SLp(fy6j-CbaTm97E{QY4BU#e#~uVlT1x5{V)jTZk3=V(;Bpq7m%9_lm}v zsIhBovG*E#fqlQ{&Yj)M3Z^_ye((GL{C~5b*>k6zbLPyMGw05oyUP^%jOt}JS zRXOqb#+H`HR;53wOXYcI#jnukGC;o+`Yg2$uKV*jZHfYHk2pn<2iXRa31pNq0jXJL2i!>NJ)-xsjQH$$?ddQ+?9x-l>>Qj6uCb_zBTz9G;S20vlr|= z?tmR67s*}nk&IHPlwXRJa!I+Rd{S}gHOk+DjHBeLl;@TTpga)u3P{DISL_e=PA+$r zJf!S$`vOu4DG2Qe&r7+zh3O<`$r^J{)duO@yYhW{NaknuS(7rJtpX(jDowluh#m z^4n_qYUXG*X^Ls0HH|bqHIwkd*a^*JxCSO|qV}*ho36fYx$d2=9;?q9u!eZ{*O)b7 zO<6O1n&Z;~pO*Nv!lyMpU*XdRpC0)1#HSZNz47UTPa-~jSwGevp8@y`#AgsbgKe{d z3;QpAYaCpB!pb)9J9G8s)ie7xFB|6L;y~m3PXg(R&<)Z6_5Ckt1?db$+DiyR3_uJ+ z6!}yq8Zkg5os7jp-Z@wgW#2D6CMEtA5=-`u;5SKhRV6N(v>Yx9s7C$59O<` zZLMtiBF`R=NZa!w>f1v>hoA`dc_K~a;#v(wJ6b2iH9?Y>y6<$YPLsUVX)&`2Qn1HI zG({PeQ(x6-8cTIr+=~br_V~njQacJ%k8n~u3ikDhKGh{iLDVHkf$$N1L64w1P5o7; z=~|u6jGn-Osxm!wns`yCiEnk9_)+sqWmHG-n;AXPM^uyRi9S-=zCQIwUEjW(Xj2{H zgTg=IN6;rabgiaG_(Xpw-=03@sq5R9Q+ste$w$z`Wv2Ml@)UImQn1JWSLCDSpXjUk zq4t6X<=f*IWdub#A#X~5u76Z6>Iy#X@e7;;DX8gDI7wk^xDoRV&7WR@Vk#olBmUL#5%MYSSCtbE>KD~F6iBZ?@vu@(Wz?R^s7^tYQ55)* zC6`ehl4Wg_Q55BrPeI`Cf-;Ks<)TmHP)1QrUz8E9l_;Y~?MXHiWCHji`V@Jt#X{{w zxyVyP6w86|ih7hGbV4pv(v&9x)m5hjO;LUn{M|y-6~tI5*FLzujO&kf`NdUr!c^Cd zRM(V$2JIdpR<^q)oG%fnKGhNU)wBd(1T`KSiS|?JOcCE0#*LO3B$Q`Eq`i3BU#IP@ zKTBJ*>Dn~yN9_mgd+j^zTkRX|YwauTOYIBoAKK^IXWFOQC)&r_N7{$l2ip7Ed)m9& zJKEdYTiTo28`|sIYuewnSG8BPm$jF)7q!1>FKEwe&uPzU&uC9;PiaqRk86)=e}*@F zK)X-7N4rD2MY~?RQoBeyS36ZZLfcCltF5dxYA(?qO8l2#p&ss^pNgwT~ypxVdQBo4~m1nQ2343GN zZ1J(QPxrK=qCrmkbRU%BZx%BI30RU?6fnNCqw*1yTS@c5g?heF;}77rjAbY|*V@r> zMGv2(&$T?aqe42_j(I^kuk5I#L5c&VgOI4O+Je1wNd7C!LX3jWj*1gT>u~yDjGWDW zh*tj=tcp9?)poqw>)8KG>8$1YZ%OFCfp@#b(bB+fv@_1i|JPDk&)=-&K_mdfIB0WtYTZvsQYa5!O zM2mbV5!Co7Po1WIQX8sIxYT7x%6S8_^T5PKU~M~ zrv*57&9kW$VG~?@zWhjyXY=0{W}7Cz-eMf+`9;Wyc&%yX&GZV-!YmayzAKtF4!!lo zTyJF?=TX2rHG6H}U;S$a?hMKkIx6f&o)Y=Y1@;$`B1#qOP<&F!^-;f;xgGtgyv3xe zq^*)x^?CJcHGi(Xrq0BAof?=Lc{TaH>C)z1S{7`5@2lk4Ms0O*YuY#M@Ui2ZPNln? z>e}(!_X(rBXYV<;S7@JwiMjjD>+d^o{2?K?ULR#;@kLy&3*(l>8ZX$`)_EmVAtt6KUrQ}z1cRbQt@D&sZximXc~^b(H?v%p71;Kw#hIF0`hDN&JO6qvk!8H{ zr(L@I%bpKEKJ5ELv$17&_^(*pW#oWcwGXrkEvSu-HB<~({di*9qhrUebehN4}_~utH)@dery$C&3tVE1YgFbDW zbbqkvmqkmbj~?FbRo&YK&t^Mtqxer@$v#QC_@33`!WubLzO>~1mRTdW&S-OY(3`+R z(dSR+`J?1y$F(_{#`o@>y=w6$wSVh+#O+;?`-f-UA3bK%(1Orc|MULT}itQ*};S-jU-cC;YQ_?YmBzS&EO;X#Nk zG-b2=eqcvypJnm!e$s-3Zqm@iUDA|(X;M=>Okph5vd%K!GQ~2)@|~rPrM9KCC6C3^ z@-h8x`pNX|@D<0U_e%dNy=r=q^qg=79;BU3OHEsuHY;spTJN+DX)V&~q*YEUn^rt6 zBCSwbfq{hpiz8hb`7Kc18}(+Pek$5MfMb~xc&Y+lFW_AW{3k)@W4b43mIm!M;9&@O znGc>)6>BH42C-T;Cf0$?igjU?V*OZyuZywLZEG>h*KurB+i`4n+(uTS!(ld}qfYvx zi%Hu1-6TnqxKGN_|E2WR0E;wqfK3`?Yp<@*vWqJm*!dMM?DPshHgiQW=Dea7i(MYa zb}k#o9xdI-8ZSM}N-fn%x}_$m*wRT-`DOd0BFkS&uB$B4_bb)-#5LJ)x(_3Jo(4!Z z{~aLRw>$?70=y1LMt3hDnY|>y1alt5}D`njoN9J7{g8AktYu zS|O+{tri5S9zl^W+6nvuuUOASxqxUY2U>L~kLs%-wWBnxc?8w#8}T5bpf8|3UBM64 zr$D$UPOE_q4Rzs>sLExn{MM0mSz=cce9unmd9_k-KiWG=GT??7e3TXum)3r&`zvTrIn|}K8Xx5edg2;ca$Nz{d{LgjN&XrI zqW5hmZo;sRF6s*?@&y$2 zMSGEN4+UR>9_@iq$O_uG7x=S+0>5Y{)*q3!hoZiKqJM%v0XdhpxkdG;jPTpvgNb}< zV_#;kZ$y_O_v4(#z)_y2subwdWMNga!#F|UI&^>7@ zCDJwJQJEMQB;{aFU*vxRancSqydHYyqIYFZcqzc3cVf`wI$Y}!4N9I5pf6$oq8U@k zOq|eR3Vh7SvHu7>1l_T9X~D@|i=&g?;_TvTpbgm!0Xv=GJPP9QV8JX^G7$|4$3H2X zGBX4me}uDyFPsO4_z^M`GYiNN9sK!;TPDQ}8{gPimn3SIS$UAByTyW&krte#l*{e# zgR6|~?aHxH>)_}F2C})idw6V7}xQLhuZ z<8zKH9BHgmngthY%)&3WiL|!K@95~Z+Pj-yub>}tpUoRk*5XTUE@8j&J4I3Yq2CI(NLkso<=@UkkEaOG~hjgy_hy z@JjJ{8dhQ9V`|2Sr`Md7Z}yi*^G~f8Szt>;R%lSG_@YE7k8>T5<~ipD@5Ez;0&d?bs;qLbuh zhORMd@hyRj45?gafpr0-Z)G_`3Ta6v<9C#_l2Q&^1Z#wvPM{7f66^>*I<(Xv&q;#n z%V`IEI}$#Wp;Uso$4m1%D(3+_0Xy*H1N_7TL2Bg$%xDiI;{^Opl#6^Smy~)E()20= z^_w8I=a7~r1c}M@@#qKnOsR*IT#q201`!p|(cub|6i9d|Eg*V_G>(@k_!R86NxwIo zu|1e=4I>$wTAH!BCY&{_&RC=Rj8$sMunT}W)*a{M&5SLW#Mqbf@h>`?u~&K&dViab zHLZmtn9SDdIng<;pIg?Z^WCc9jZbzr7n)Y2nV)mZ!7ZP)7(MXt6aDXHq->G7}5!DwoaNjy8wBqQ1s_}cjbAC7C=9|!22P-@}St{?H+AVH%9X)OAnAg=cO@CmI ze|p$_`0VQQ`VD+w*wf>c7qcP>(R z#NjVSjL&uRo42*bUQ93B_{SA>Jr`HfZvUf591K>$>w1-v-jp04_;%x@rU8%JG#YvJ-LpY0@?>lIPMa8WH~ieZCLgwrSXr#!?*Vg9?K{!zizRE~sy*D)yUyJw+xN9|PklDK-|c%F zzOR03s3F_*89(G3HE7(Hfm7?x&ll6(Z_4cZ^Vfer=Fozb)f4mi2Lx5Fe7xbHy)~UT zHuSyRd*=6BPI>%Zan#6SsgE8HKDsDR_<=e5X1*G`by<}aenp?<=oM3{-K-T(Bl@oJ zbgdSi@JF{H-e2c`mUiV%*vsq*xl8w~=rQbE>$0blUWZzi)W2*f(&*MR%gE+m>7}px zP1)Z2d`f@Qm;GnF_&M>{t{dHN2CZ4_T{>@@CQ_3(E*&Pu`z@MLRpa*5V)7mDpKu}k z=8tP<9>3ds)B3s<%18K3IJU1z-4&xeCQaRy$MKR=+SWy@JS#^WZN6#j$GJKuP3c&*QvN|-&Ds+EF8W0KwO5xPc#-!6N$SdDwv+hP5FFy9!$XNrg7G3`BmtVi`+$Z(s z&Trc#4d}P$ixQ9XOc``x+k$81=AFJ8yr}i))ZCWYd$#oNIMt*6*kz0F8FHo{i`{#@ z?sVs+X?IP}&+X8sjGC6{^}b`lt%JP{QI*P{X)*mluUDgTuIW{_-o9rSTE_)_*im&( zhkn_b=j~Bfulw~#wVPdgwXB$<_T!4*osZigeb_PQpoYFP7 zkV4m;kM=J9eZ=|`&ED0UGV563Ik(Ex{l>D)JA233ZLc>Ctyrq<^^5)5>~4Lj(b5_V z)0(y&nC8|bwaafOcI{nMu5QIL`#z5Bb|@wHhe0v(zwEK&{PFGKukH*P6_NeJ%Y%cR z9a-YJ@2e!PTzPiZm7SG$mgqk7R*|P2$Il#ps9493H&#c4W;@&JXkR6Q4YtAVOv+eet&p) z=@q$hx^!=6N$S6Q+uolmp0As?@vYVe$}HOyS+dZ(8hhG?Uou|_E?)a)%bVHewu`*d z;9c1c*K38YYv$;%{N#x21)inE*W7a_py;}|rB_Z3Fn>F~_^$7MZJZubFWieD|L7FN|=0;PGwnw28BO41HMO^_uqAb`>{G z`Pihyp|EZjtH*3>`Cfl@#TT{mj;deS_sF)WR`&+B3D`a0aQ%n=2^;e+ewl0Ef&*tC z%s&6^!`2sb?p*A$u61+wUi~Xh@iGiQ*X~++$0dG4$`^Y5?U2Ngv*z8&-hR-X-ugBN zN4+mO<94|Qx3sw|=>h4ZuAd8Bo|qoBzD>34344=XxyFq6)_Kad!9jNxZ#PxiS~&b< z$8K8(H+LMj^oY-|xr^?IeEU<-z%knv+fGJPIDlMw#;QU?af^)wt-E?ul{U5aB=5^}b_MK~R^*vQyHX1ni>#rK; z+V0@y7(8M8!NMiJE)_L)cA#_F_60ti)fZ{Eqv(nXKG9?S=gdl~zjVWvA|t$eX*_nt z&ue(ean;3gKhy}@bY}FzPF2Qq2wc#&NS{TQ*Uo&mKTk^7#VK9W7mxa;Ufj>;@;FQG z9hdAX6WYGaa6bpvUL!X>3c6f(oc}e`!m^ILnkKiB#(z~|>gX2V_UK)8OOqq>YADdX_o^}N z%Z9U76j|gktE5X$?X34f=Fa-YzYlc&Lf`Y@sUaT+*7G_Yn$P%hbV&2)uxzK!<-KvG z=O5?8r_KBQ`GcPI2mU-&lW^b7RI>Z$$T0}cHSvlj!Y>}G-XRP{int#U$gnom)emO5CV)mP1o&6%Np7ttzXhU*~`72v&a;ZPL+|Ifa`d8XjEVbOU8Gg}~ zCbvxQ-(a1k->Hd{UyfY#bio)W&v%=qMl5M`x=YKMMe1!iq+3&Xw+SkW< zaEsbgZnp{?wRMfrz5MZ#2FD^_RB|o8r1QqU1&6-d7I9(fl65m5&F!#wYDiG5=@(tO zF%DOz)D7CQe|JjW2ZvX*`K`TUI(D2@antkCJGS@-#eS%fZ(P#5mHRih&r`KjyENU-ukWS2DHS*A z%=FlVh)rvX?R@*dSR;S6!$o`lR%iRQ+shx`tUtV8i*CDiAG%v{W!oEPPsG=0)8oqY zwJrBeDe%pmQ9FJuH)3D0a^BugI;Cd&c6GB&3j$--MED$gHqxi!)wqMfwK|S%mRxV} ztJuB`3zaI~>(p%Dn^7mcA~i!UyxF+--Wr#%7qwp(sh0flx8e`NLfSn&S*X#&MX#M^ zCzlLt@O!N%h2KtcGKaTb?A4{*4fFWu0c~P`o^)pJ!o4HvJu&HCv^pE`)WPlhnyLE> zfBYu2bl-J_`>mAv*1wT=^!mb1;~i^!Rpj`tLO=KD(SP3C4_ozTzwc3_>m=`rKaM#x zYweldD@wQi;%t@ShHhO(of_m&ZCU7(W=CsRThgOI1#NL-;kwf3*S99j`Pq14&VzAd z*4LP|d(SIrT6jZ;OKI(XxUr#iAM@JW!xlGd`DMbGpOWi*8{azYkf!a@JF^4W!om6m6*yDQwk^FzY%U2`fm z7+kb$WBuX24r`tEzM56MM18lyo!7*VDDl>5Nr}DPpDzt?pEF{1_^iRdN*?DcAAaYT zl-#mk7q98RXD<`7W1vpoaK_%VI_IY${a$XWu=#MlE&3AspZwH$@R`Y-<_%une(amM z!*B17zI11+wxV~9b^+{A$vTTCwlICzP(An!evYhL?lwa_8m)4xaKp9Ck28zqJUDsV z$0p+nKR2vc;n3Kr(ZJXHYK*6HRi3ch?HCUWN`f1GCc~5Q*e6#LCczx$%=ND=gx0!bMcEi1~ zBPZ&z7wms>ThGn8${l-A=HBElm*1Y-=}KEEb=vs~txvsbYI^SXe9?kxelsT|n6z&y z?HN7rkLeX2`&K`{>qf`;7FEuUUUv8HVW;37-IpzN@;(&EN;g?r7k@MKp;5%eb9Jxk zX6N%C`Q$@E*Y!(UdcE$}uw!-Y`&ABA%09VQZ)Z=JK0W%Md{ob2YwfgKd*IhS`(@(d z;bE5>g;ezju5==P>b37GE=vqzOIG!NQ@`8lFR$JF@=e(BmjgQg<`CxI|H2o~Exil> z^8RtB`$?W{<`!-^dCfWFg2~4m)<)GXurI-|FL3g2-?n_|yuVBC$bjP`XAC@2>P4<= z4-O8G8bAH`x`1K+x@kw9AN1Wjd;d2r0;OJVdo8Q>41Kj>-ON|dOXZ#Mc-zRnd750^ zSKxeg*S-sT9a@;Z>8tKNhIYT@{YUD?rcNs-&EB@JLxK67eq)0g)t^`EkVESo({oiC znyu-kcJp^jA?25pFs=T+_JJ$qXT10jwtsP0wKp}2967da{lHNjb}p$K8aSp!=iRNl zI$zwrtL1^0uBBcE&O4h>MfbMmsD0Ad(4}n;gct3%ZP})=W9QfQ84(fRvf9we?$`SE zH0vJh%(i4|tq%99N9{Y4dZ5VnjSps9652* zrg@)=mExU4K6PrtkiV?ghT6o_;HQXs+kqbh|lk zLKv&k@kQr0k!QQViHbJ5e!n>Aa**-umBgPn7@wV5a^UxGp5*np)_UftUMbyb^zK)# zVXhffue8Z?J9#ym+a%AaKIKLiYE@)*iE%BG1B$wpD8FI*hNHpP7Uf#q;H55iz0~Rv zja%oRUg@VtT@DZZrey1{ibre>a<98Ew)6DBv@7Xpo%$rj&-BgiUo8KK631_^T|6K$ zSGU2fTdnO{%iq~Au4A(Yo63g0FXK};-=J!Rrrz)SIBDRD9|A5e$=^Bn_`{f=E3Mk{ z`fHEmHjz1+#N8{L<6%%xvaZhW_f6MazBw{v%U4$l>J#_hSdDaKHu z@`t<*y_*MZ2%Ir`)bg3%bnYD*?9%Ix-TN9ZTr#-iySrn1<||aI;II0Fr7u)2f3s4- zEoTNqzBnCL`sX^~`~6C9_^w^uJ%KMHFLw$lR%e7y&giK#w=}N%;~mE_%U9?-e0#9w zg!xmdmr5@k{#5Vdr<*jO`tmc=Z+*Y+ePrYG|$x^dk)N| zpVK@csLJRenx#&yD^fuT%zl;z z?GrXOy85_lk(p^#Qh)kpO>FtvrI-Ei)(4)AI%nvebJX#o*&J40yyke;ZU4rL-3QjnbN;u2 zkw=YNntAtHc7LZsUH3CzHQkoI(x~y929DeCOY?jCZ=VePedV-L$sIQzKY722d%Jb= z*#p{-9oCqcBBtZ)!c2yp3hc@t5Tr9o9@+UBC7j zMzoQo3X2eWW@o1VKSv=BlH~x%B0Dk&`Y4e0@hI)3K-&MKH0|*bq5d~a5u{kqlbIH}+0r6kz!RvWjw1j6 zt1yuU*FL<`^2a=nIV3r$1Eu9W)FYWP)FYT#J(9I_Rx1yobc{o4q7KxKbc*W9cu+4Z ze3b8rdc>e7>JiM0E_Ey66@I&f;A0Lmt4BH>tJI7AOg)+tnzK$aKK!Aa3^T(=y2f7d za`rgudNk)WNd_q4oW(IVFseDmVPUn@LwGRfQF{A@DG)5kPGzQNANycw;r&p*K zmR?~LV#F8e6|Uhun!~uJ7>9RA7+^ERNr)!9@&PEX-(*M7fJF6gJ)l@t98{kJP8`oW;n z5s~=!2v4tIR`oAg!7unXpVL9bImwS}E3JIA8U!Hnqn95n z7d0-rG7jH3&T=}Uf9o9X+RQK#3*`q(khYqxo5OVcwc{`vz zqW%OaOsa!Vm^2|X^CPPLW0)1c5x}U9nem&N3BPaTH>n(8-aJ_jymypCenai4<*MZb z-Y7a{xodI7?*a5;YuHWZDh1=?D&0h?7jj+MUDWsmHLqHpLLk``44x<<+nA0wn>6x* z@TnejGV8#_x$t z`2EB3-a@i+hCcY=&`Ti8iR(9(n;7XMmZO&6A@AbsTegzjV(wBNeB7m5NPUZ34f`E6 zj-cjEjJz{i1_Fa0FwrlveFWw|EE-zSWq$aK<0%Hs%NR4`*Bt+;eCcVjGfqC;k@~}? zaBarF+p|gF^gSNG>tMOG?5w3Co(602F3NkPoLEKV&~M}GaQz-NC!xh#3?XT(0l(^T zM|uqY3ActE6$E?x@MrvzuMpAUQ+{8`Ub&1?{=%P}lImE4KXc@Ys^mCI8Q-S)<&*ZW zGPM7v@}=2yEfap7aT@%U6ldk{1>`fvLbBq)gWa%1Gy3OB9$p}IL&f7;%NVq*TfI){6__dp<60ffjDPSeeb#}&3MYTWI1#L%eAEgG zp?x-xIi+X@O?6C0S{>z6AvE6*1j5hpAjqb$`toIY%{G~>-?TI{;jd`ZWSu11azSUv zKox^=t&aax27lInye`fA4|--*Ovbf3(r*dT=dX@|=s#nJwe>Ma4d}5E8oP?M(`sSI zZfOciA2AOEebxp2S4A~#y8h?%S8B^JAIuh_@59Ei>RKPbTbif%_JJ;A>=EVrU_g;3 zNSkOhHqXr94PhrLQ9X5<#!sClcS4=^KstFWZ+{exgYD8LyR_LZ%}~KSfVVG7#8Dw( z1W!*vTGkz8+luaqkM41nzV;g_g-<5#Eppw3`?2H{tzvcm)O!`tCz7n6(G+3aX?FgutW zP0o$*D_3!B8fwy+Ruf*aipATd5msm=mMJ|6zj2j0y&RxO^gvCf2Wm3CW;oPG^hg>+ zFBX)Do`9dHr}+#$KTwqEIfJ63+&c%8Gxe^KOfMLyiC#TWBzhcvo}Tuz^u7Q^E=^FB zduMXs(rlzilgV^!(o9g3%XIt_<+1C5H-!7)_r+-ZX!HjBYvY#{y?<-`vZnX%jNj+z z{ddO?lN2T)nw4c`umYE4L{G|+-hY4mh@R#%^#0r9NA$FxrT1@) zUz9v{k*qLAF;X5s8olU$ZTzyL_iv3~*7W|J@%tRT|7`p)*;s5>z8>R9Fw-EOP^D=d zHg&U=(^|Y%m8KQwT7s=S80mW7+0rJYQ&nj*(h=Ql}6)TEC6LgSQ-B!<|H{ChP0Ut$v=dZ_W!8l zdB}Cj`s2A-NF4~V4)8=H4}TeEEW>oUb!dM?!FN>0Dlxr-aR_q=y$U)$xs!}o>Sf&b zPzE2@2*10MI{yT5Ww# zSe0kdo+0=hToWLE{@W9{oIO19I)`+VSVhs7%4j=;@xJ6j-CUB+m_YRPrHvA3>R3{s zpc{pJ4e|;rbi=8h!e1P7F-9<_koln97C#36HDo2^&E$H~9unjH)`P65?j4C$_E7Rj zeqA)IQa$9qM*eY*$K=R714$ORGYiyc^hR@1l5rYj3t9X7DDW)c@0Jny>oHwOV_e5; zSQV+tN2OKvPx?HVv#2K6~s6Y9He2DJr%wdg7-5L@x^(chIAt8)mPHP zkk*68AHkcNhQW@;NA!(wQ~$04zn9XUl3KIS<6gfzTj5~ zpd&iyyDs#5;3==-^)1TOa=(W((eVPVi@-(KC%~H<{$POS)(zmL`FaiYLX^a-TZ_8=DcE2Erf4gx-+ z*#&vQO8sQiCt7X}oEEOdd*KSIE~@&v3BuIj4+?IV=-v1rfOaAUC9Hb##yp7kI* z;KUfYcmh_TIV9%gN|e*QbYjv ze=z8nK}W~T{&*5m47=6-G}r9sBc*9B0+;^`r93B~152{2Pd^Ui;Lr3rjGZp@sw1ny zbo{!7BV=p#2q}rD>_MPQ<*=6_g^{1ibl${+JwHQO1^>(7wG7}HK+F|?(H4Av(q=8{ z5RKKKQDl-w$YS9B2>2tQ$0xk%{K?kX`lj5{EMOd9X+V-yP(6>3?dXe+RR|;= z)H1NgTM+f}f0ACr$9wRhW8N6^3V7t?jXsmyH4yVXrVG;rkbeAub(YGJhD@^MzXKf9M&QIgamC6eH=~QhVk$E?;L483B6foY zMa(xVPA&Wh31hBjv>6nV7%Q$N)8b1-L&pVEJcRnaqN?}MNLn;x=HmB!Nnj4{z+ zks9Cv{QxiK7~sfqcbYYJ0kB0jvKqh&t z^5DD+`J9vc>J6VK8Gaqf+)Of(@$Lp*tWPo2o@}aGZX{=Q+Jtf;NBnmc=AzE%hPEU} z7p*M~-vaeWX0HC~_Ilvc1Ba_3<8<^P3~6d#6YcSKw|5=j^8ys(&=YNFtnuQWv74QK za^3Ql$DDLihyKBaAtqbO0L7(i_6zfwuTpTEvVv z{s4~aPi#2equmW9ZI45bwtC?dag#rG=t$Qyg#USabD0%0#%Uvs$-3f3y+aA(8C^;A@KY)CZ0c3!N6eIrV!IFh{XVDo`z zh|$E{jfo1+_LfP^xXi4^BUrc0tOmR`tJR=uVKwNQtcFh(Rzt6{8mJ>%4XkoTAK79# z1cxI}LOMXP3b+oz9YGa!zx7JL=`Mt<1IY|CmNX3(5Hbu-GQI?Tk_jv|4A>TzBY6kHV;$7Nw_ z(RQuBy%nbUX~ujo$g5g5e@B)rbR6*UIc&44Bm=U5hfyXhaU3ij?#!*0m}H~2#G=hd zv=Nq<+UP)2Sl+DKM8I1YmX_K?p^dP#)TaAqEUhcs42!!3aVz3B#2tt`5qJ6Hb#Ijw z{-3wMA6apuhqi<<7It^3t~_A$e`J53K^DTxyJzR+T~@rjdZth9b1);1aX0McYQPDA z6Xdx^*TQxaZnL!5XqHA`gI08(qF(TF^&wsN#O~*yb5i{gCFh%#wxsr!-EtygS?ZD7=K}-xlN`! zFqxSI?{x(eFK z^wBOxViRRv<53rPwb1{Gkdw|xX{~H4$+OES2a;7*G7@Fv$=SC9PVlV@!QDIVtQX1U z!oKqQpN)jO1g^8A(vdl>yYB^YtPo6w% zr))oKXJS7+AXg8_lpxJ957=B`ALpQcamAbUfD9Ibmaws5xK`U#J=*Kho}fLRTg1Bx z&m*+0siX-H*-RGgDz7aWZRGY0Hqw>tMtYB(tt7|wf8Jh_zd8W^>Ld88!SGif{fGW) zBdjBt`>V98%PnRXAz77ATtEJU`IBv;aizlk9|^W~HNKcP|4s*=!wbgRt2QJENq# zZ&5}z`y+6Y&89lSW)m%HPqvcmb_B|(O+lo|Zc|#=ZPIh9TODO)?IBf{4w*^_OT#{x1}+w@eji<#JTn&h51b`Ke1>>eOEx}Zm}bzJBxLza(aNyV zSS_YIV`nDwbgYt&UZJE%a+qw9~RtK?i^l-vm3Z%$VM}nfJ$_&|?a`H=4Jnc0_0A zPn`c@N`9gOw}Kw^f95z2-=d$CHY;*isKA#Btd4rbS2MJydchAhCII%*f%6fi;C(xc z*L#UHmA}>Va@}yAKL_%s{BfX1@Wc+D-xG8wE&98EJI@zsAJikd*n}_vu;Shv9#;6+ zd7aZavxVoc{7osp$6?R=95&j?@dh)VF1TDHkHgwEJiW67=gSxT61_QVlyRNJ`K!F1 z)7h%%O`K95yo=`(ztk>Pk!SIh9RG>C9LBEVbi!6}J|YykM5F%_+ErKJ2}M4)6gvCe zdAnrr4fYeTm%e?`%H0fIe${hfA?%{gZEQ#aOujP;_c=QS$4+RF#;N?D#H70tmE_H*( z1i+#pADSnrNZJQmhO|}^nPXDMJVGI21xj#l>EvH{~I9(YFBxt z(jUwdlJ7M@8qc+fy!;z5-Peb)3yAIUlJ8GB7<&RQrezR#(W4A6@FoQ_Ry73WzR(vx z#!6z}zadtQX6%HM1Y90C?fvwL6DxrK@6_Rq11*cVMZf&k0%fSKgl25Nb3W#UQXl5P zfR1iZSaGIjp-f`A5#{9?X;++3rFUWQb}gQII56Vd8{ZPzW|MzW@Q#~C{>j3|C~3t^ z_^(hii%@X;;9rWpm>X(40*?=%3(geLn}gVALmr(z=T~l@TxAFLX=s7{d+?tPSe)=i z1AXwLo%mz}bO59@z1>2P-mk>7Z*Y4Ij+dpvN%?i%Lw&J zMkEp;tBjn*fJ!m6AZ?Ho{VIuHosGd8GiBH)lVlWRfj*~yDi&nw-X4dZ(7_aIYvfLEvf|?0@m&QK}N6X`Bud}?D z)FYcV5suDQ9QOUQNi7;U$+8X8s4d$_nxt~jvVu}UiTaUQe=_!kY>NaN7lYHQ<)LF5 zoLh|n{*0PI+}q}f7$cIin=DfyWrC#Ng4aoCC87UXHc7$+67jP^Xnzk!Ra1_f<3QVb{L-+ z{1Y_w%bRnenyXYo`o!lu*CS6m&4@#9i9&n8@9_2*ym~1f9CVQ7*AaMopp3Q`8$;(} z@Nd&HH!6vZL2e7wrnQ@|Xe0^SYBor#9&4ShEzE2P!Uz$cW*m+4P)aDxA5n-l z97AE`B5G&_as&6aHHT&)@vBiF|6K^7rPNm!=mt(Zf-cPvs>vb$We8fAI5x8SahSuR zS1}mJV6>tm8#FtKwglRqN}3?irq-Metz4Ay1O-lm-bEwrDh7*b!|MpV+CQm9wTWu5 z+@5rbe2Pd}Kgq*#{ls&kHQomt;zR{Jra$wQXl!V;p;agbgoM9Fx>$-_RymN)1v_y2 zLmEvZn@!d?oP$*Y&n1hLxlct}+}>K7FgA7&TFl$JDQ(FwBg;pS{4q|+*7naR*{>Gl zmGHIf26*5)D?CAw5+0ryA*yL_H8c7buu32olCX#LT@9 zlJ^y>bgIIp7?{lh)UaDoNS39V9=4J6 zi+Clij6_m%>MlEwciCU5lk-pjZ<`^m8z6a#FQ(gP9`kaqLUN9do!l(sbl)J#${~{u>;n;xt=9 zDyeCC7Lcbf6|Fl&B$dHdeALehE^4W$%QUDd`iYayvaO`iCY2SIPgu|fR?k1H z{i8@k6?%n9kO8dFtS&zxP6EeDe5y$ljnmuZ-@_1`07U^D5Sg!mRR zqx&3^$}p7J_z=0nt@TI>v@^tam9pv!?Y~XJc>92};lP?5(z4%;BmbD{Vbv!UgmdD6lL zg`-#0$_uT$tgXDDSF~$IyB#suQ=l=fh+n4Tp)TxJ#$DR_$l*H>l8lXP3Ac};XMFUi zXGD$mhj=d|;uVP+<0d4;udN7~Uon_4?|smidzW)o{k46CfM zX(I7VPaU|X+mDJEEq)IX!>VXRKjI)Ic)&8Ij5;Fb4B!8(CfDG*=rl&uXI@fWlhsJ8 zcadqt%VGe8?G(GpLZ8LnvhZPI@T|!Gj)6aI<2z6GT1R(JG1#l7e$m}@F5D}{;NFCf zg-Dy^v^Ox@_J7Gsbd#tLgqa|XIzgJt>bnH$LsjJ2?=8!1Rc-#B7fu{zT}OL23LQ!2TWAfS*c{@b8iBN zJP4ngGPsQ=AA>9atzLYDGutzc0ne4>XM$u0uDS7Eali7W6}!z3mD~kInk8@D)U!lg z2Kl%Pn?inQPrFq#Bai=l-DuUj5(LJ0@>W>?+XyFnMC)UYp+& z5r<@p5@Gi!^hKKcG<}hZ!I)JBq}^cdJMx|2jJv?(+u7_4rzfZozC_F`!cI1j>@Yp8 zCJB+>NK^z51Zl?6{;#kOVjs8>>NQ3rjeO}Y^j>Hr>HB-DP6#a|@7}iC>xdsoKs7n2 z@A-o4at-oY6yzbStGFY{YM+t3j;>Z5|7>5{2|fOMJGj*25mu_7w)X)>;Z>2%Q{OXI zMVe$yR*m-8i8fya_$oqUNo6#Kq#I&vGmd3h)>R%o@~jC1jj8$`O01Y_Oc^mmL+j`X zIjwtc^1}uYLrTxCd#Go$x=e$bVr0nU7oI-#f%^lSMCOA zb>KO8{~mUVp34{oy@`27pya_ho(EJ;_m_n(zN zuI^h_elf@ODIbZLwGN0xh3f=q+vmObMb=)EHLLEewr1)w4O`@Gvu1zCi|3X(yT?D^9f z5t=jPcZ<36pZoDy^^@+PxYspCo;Qi0DRTG;0yE2O-(B|q@iOt}{eR-0&W2E}7{nrS zk0?j@Me#EnOR=PEf(^}X{Y=exZ>9exqYT{22_(ywm4=u|3Ym#!^e!l`orr2nfwB0VL*Bo_5d#>*iLCa zqAfT%h_TMtOFHR6Ek*zTyS)qsW;*3T_bO4@#rzdda>~$Vn;gWG9B`uf?@A^&#)Zxg z(6}AOs20U+Bqqa=k(cAInLjOW25(G^(-GP7a<86d(P%aY zPn+ppjwtfdZ1U=bUx32c(dsrDIz=^jou3sSfO36QxnzmO={n-*vX$e}(2inCP2&7@ z2F{;ZInTP@Xi%yu)93dOTnB%_UI6}-9b+YCbqqTyKda>B$3B@0>U)j+Im8@O-)op~ z>X-+z?`3k}x1Z!dXM2hFXluFpUWE5Ok1T;O`8!b%g~;8sl@4jlb6&2}VOu%rFg-iv z-U2;;jI+w-h-_bpJKH_*=k=PevHwi3tLGHGu-DhNIb|{6i+aDj`Z_By@TTCUbgO!?(7{6~4Yx_5{-Ql*e6Kp4G z=}KQ(eVt)jOJinhkjK~>E1xRsihMC%o#m6lok6FYl2;VJif`|CsBP^v-^}##j+KsD z?}+oP?A#sMzEKZ2FJw0d#uFF6Ry8e?4*X;e%4eO_BC zSlef=EyM+l;t-hz@lqI9h#K-(_^6UcV;fmBYSYN_5sSwaAKCaU?VSCXb1TN@Dedf$ zMI>zGGi0s<#6b)^HJYC`Jy2|oT*k-tua4_czs$z;|N$wjCMAmTZP? z)YNwv)K186AjVtIa`}ILJoEaayK?w+Jo)U5lh+!t4`CZyn@?85KNst@A4v4g*n3&a zYFV*1W6lyE>w=Q~%m|hBmp_dvBxOF2=?Oi}UU7Cgh831;3J;6>8p5C8JHED2Eh3R; z#3Y-Cfm3)Ao#LXiwRo@L6Q0zwbqju6lz68%n)q29nnA~`eIxlEvdUL@Mn|mW_M`45 zdo6CV*WxC7E&R#b4PdqRj5vp)C!hL9#Uwt@sdsAsC{mVAR(8WmNzoJh${=K`z7LFH z#U=8OO5n^Eei^{{dF`o+n*<&{)gF6)j5MdDEKXPhl|>*+)d{~bmr;YdSTUrXu$X;+=Bi?CWO_q*a*S5!I!$O{{z(Uu& z#V*0 zoK|O~OY*yJ#Vb6ba{=}q(FH4RwMTT{nwyzN#Ai)2^ojaKdlUtrIn-C$&yHa)CGnJ# z$!~5#40sBd+50RR-(=LV{J6Jv0)}+h5PGUy3D#S~iqPi09LU}kv!>|jq?_tw_}_mg zg?2h+Ta9TTbX?2=@iqt7`Om$hg8djxX3rWm@EB+ok^c~YcVNVOC*nPoNs@R^WfJ=9 zfe)4NKm#D^a}1S{;|O1d_~U!*{JoSMNb!?p_MF?E$Ixj3dOE`Q#QE8DvJ`|9=a}$x zhJ01qlSclgB!AyT%zQenV6#;=>!L^B&ujV9Ai|RI1U%OX8z=M*p)2svGb}nkJ_Muf zg;NZ+6YP1x2d%Nh4e>-gb1YOsZxK>CKQ~4>bYg|_IX1zeKhk_9575x*9XfTAOa6Aj zcYWkgscp>pn>$?VGPjAOj{I7RFpp*dDqj3Ymvpk>v8cNo#UW_yPthLzHY=fNyUSa0=-;A?0=Jm~OdI1jBM zYkDS3Z);Oue61zo*)G)&Q04M>rRi)MJ)NO8C9S2@&tTM7_qV*N6u*Kfmx}(8uJL!Q z>59hg8p;Xs9+%N5{fuSTxYu1-(-bw-r>nVjBkPs%J@jg@JyEPCYI7?TgDZPaKz;5) zgV7=y^p+*P-9zss(OwKe>a&~moh#aBUT>AJ@bv6YUGK2=Tdah(_X%z9%8*5j0d`@T zHS)!24uHAH*ppe!|EJ?fH4Gm(ol5oDS6$k|fMz+xdCkAsA!Z{-y0) zC)Z~U&a?SrJe^sNV4*uS_P)XRk^||9uoJ`;pZDBe&~syY)*SIX&^Fv4}ywgxCyw_eJFKX^&P($a8>x44{@M*5O}==#3=0>W?cv_i2== zExi{e;f|8#G3^D#;Cmo?z{_Y~g6=1@avmxv*yvfN8XNcONe0xG*CYHq&2b5D&sKxZ z^)(0f;!3Y!8IZz1ctTd{-(?^lBu9ww;WGDafObbP!ZAIgA?sz^xh3Tj zqh{+non9l~z8vd}|37ub72&Y$f|6$N5usn8pi$DwBOykm2rM~&A4Eu2!hXPC9iPF< z=zKa(Y7WUK{7*)RBw@q%xe=lf7P~U!m64{D!aUTmCn4hbj_?j|hj2-e=fU5RxoaIA zk_pXM$54F9LG2lmm!=D~Z*8kft+UdQyrG}hkO(Wqf2tweGq%YqD-97n`(OCK68K*U z{I3N5R|5aHBoL1OMx_t^|F?l1x2$E|(?&TL)a+(C@ZZpjEmzY^rhiONw(L*8kiI#+ zfu*u#m}P)vG5%M#81A&`zIBcKT$=XNuHnCTPg<_wKd<*$Qt>amr!Bu(Zop6C@7NP1 zqVi8E>Hh{L5BALBgMTsiWp^wAxDK@>dmR4IBs~~s1Nx;mvlO!wOuv_QC%w95MEcL^ zJ1u$GM%;zb>DXtM^6;-hrIP4Rc6cq1@sIDnTGm*WS+-g>;=j+gS^~GAw zqzQQARnHz;UI6nZ%RN9h

~7O7r~+TX{+3i4dr_Sk^?Yj@)%}`N_4$pp+FS z3!T|@dYi<8a~akdy^}W>t0WIFyXl$9-MBh(KyqC}*^_G`X9O%Py5&;;OFJ&@yySm+ z?a_**>CRI5Z%h2M2^QEd`B>+lXVRRPP@AqPOpiaVDJH!^2tGG0?(Mt`Ok!( zIe2pa$;GF~M^rQSKRqL0mo}K~lln-AtZs5c^F&}=6WQKORDt>AQ+I>bgRP0|Z~Q9x zYqR5NKVzF@M}0?D+MpYs12AgBug1>F-#HxE^RyJs6WUGjoEcN)%bJrbu&o;oJa?O(@j5^y)!N> z>INF`z^9Y`5<4+oF2y+Py|nA}W}|b;CS%SNw9*_lK1hBB-kz{o;O)qzLzfO3UnHlS zn}hxvHgD{qOTQRDAYH)_WoSC4u2G+I*XWRP4^f|#9$CSF?F*T@lip2gKha6=DIFf; z4;ni$(*CEJdB-J3Z2VO>{^x%a=|X3G0=}M~H>sL2anbQ1Cf#8C+_1ADMi<_< z(wWLS3;uS-+fY@fO{{jNI?8J3BtuP|Z(^-8wNdt^u85(IE7G4N_AQx+!G#nnVZ@})JA zJ&h|;3Q4do#?>jClcJkH_O{B8U{?VWviRK?lw=WI6FLpYl(5Mz>E$O1u$B)CfegXA#_M2kM4vE5+6 zRRbcTB^neZh{_TY43G_}(n>AWO-LZ-i>Ro4n<7O)%El1kh7(TXi-toTU%$iBZh zn=e4y=b!hw-uHSAy?#4$?wPq~=AO?pb7J+r^E)M5+ec`3@Dr;N5lP?t>YYUUiRu*W z{(kjeSI4V=RiZaeO)B*Lt=dv@Nz$iSW-Gnr)9O>J(~{0sU#~x%t^REFB}v~@$E)8c z2^-UrOf~VU$#mt$cs0`G+c?z}A(q(()r?7+eA&39D=tgamf7QL-hYSx2dJpP|{d;ScmV{e>S? z|J1=N%Dsv7c}H?$3-LtEE2%p&+E}&T=?Q7ag72zpU)RanYaBsAKRkK?u z2p;GVX1_cZH1AdX;&6vW{mK-ftth+Eu5I0|1h*a_2a6uV(icOE2*(&s41e$dH^qt` zL+Oh#yeGzZVzd#12~v+?>WeX6k3osA8fvd*?g}YG?KSRuv|;wGJjr_0x?3`4yJ88E zbqU}L*D4*~!=epYq3d2vx^Hxl!W?TyOK9HKMD6A^ayGj)!W_8QUcB|f`ctM>S*9$# zOj&(pD%Q(1g6fst}+!ARyi@Gl2;&rTju2?;7tl6SI5iuHjDVZjf zJ94#6%=iBMg;B75&Gp(IT4VdxAE56BZ4cvn#hOC{|NW*60inJXWHvKtMf6nCj2X?JCT&Cz+q5e$7 zI;!0^M~&O*J@dlZH6HKT7p9!hN}XP@)8qZ-1yMT1%qyFlSN29yo0^N6``|h2~dUP*C_p$-HYkG7K zXqldJmzwX^TV}AXdoa2yFVMXi-J5sT^yscZcg+CZN550|^X5ujx2EgX(7o>h-LIkh zwVg+MbRR|c(LUX@Is2${N5P5O&eBB2K&Sc3id*boUTCw$;r8eFqVFZ+*G&)Z7g-`DfL$G+sk z^7f=z5SgT>iP6)4axw0L!C24`*ZF4+dEemx@P-U)tO_ewI|C8 zUCq9=b!K*J@=3Z8z72IhK_2jJta}`{(6_11;X70JF5$n>oJ9qT4r(*JPrq2OU~gGd z9dpvE^~?e_?ol4;UyHwRdQINmvfd{dA|*9)7S!XWduDrIebJ=VeT zw<+`Wy1$lLGXuH7NwS1**L|Sp{_DCw>ZR$ZOV_INI_mCPYt7u7dbs0*GWq9u)s@7Y`OfVbA|e#xv!X{~qUOJy!oW}z!(TiV)YU+OkW?;B6~()HijzFPOJ zm+o3y>z&jSFLRrRc>f{evE9CA+x`gNRn+xShu7!Z8@4g-^!@k8 z)@L0p8MKt)$Gp`4bof$Ft^LUUt8IQ|mGCNxHcv?? z_O+i{+vG0P0(l+CL+8lTeeWYnK2LTES@rGizSRi&rs?ybbE~2 zrWDXiPy2?x>}O};Tom#0(8nUT4}Hwbt~683V@I$fat(DeHQhYO+Bus9yd@A?_(d?Cu|Rw_j2knw%DhfnCyKlT$5}W zqIi|t2yqhfM2h#Zbl-JIWx10F#83Bmk*-48ujMBDdg?RJ(pR6*z0^YPErjPLe2ca( zeaxc%knn8nrFAv#d$-T`7CkmUx7r;Wm+lJWeKw9>EY3~i-@f*-*{)JzmgzBVddxH9 zvbB#k$U4jRll2bQ_nGav)b~c}`+e&B1M2%0^=;^>@1StIDc22stEav%_0)G5A+};) z$6M6)7d^Ed7(3nfR!?pFr8n8F^=F1@(>}8YZe$gY)25~6)=-mKxwoWR)IWZw6+YU2 z1}l8rXN7-Ix6l5K6(Z^fmCtrr>gRjaa%rz#EA)vq99Z-gdz3qoP z4k+hWEui*j6`OUk~pOy8+V z*5v$B^=s;{=fAJt#eMlF_S6pBe4Fm?sQGtWj=Trz>8Z=NmGb=7xX^Oas{fPvp0BCD zHG11q>i!l@-%+~(_Pd}hu$-O>yY+;TN1twK#c;d!=G<#vY4P3?^p;nq496a*H5^OM z%!$m*84VNHM^zn66zXze^b(UKH^L>+x>si+B5gc${5JCmE$xf9JUXRMALp@s`ZPUWSzo*jeen+R zej2HMtkilRZ+LNiqz#qWVAt`vz0O_c?Pz#^{fRwU zxBhW`*1Z0?^4IHMYqwN2bhOYu&FxJc4ek3oWLq7eojJd#vd9w+*7d_3itaPvGwD9Q zSm}t+eR25Ay6+-`G!xYLsVaX-QW#%5$`z={Bjjmt2fBnFQ-g0p=RTOkQ)L62i+VKd#_x~ndT|IbH<+=?Ud(Uv0tM7lO z_x$UA`_qjVp6))?`1FR})7>BW6SRx95^b)xuJM_Q#tu$kd<%Gr-xs5Py~<=4a?cT( zj^xG0&JF1qyBZH}kf(zK&(01u9@rqCP0adc`!5@5^8~^E6t_*fJ2I>pziSLSe%IJR zxu0pu)U5IZ$Rx(?$T&UWZyOk8mWwzW3WaLi(|sT9{dhyV7Rw$_hI1BGcCRLWLwvIc zpKZT0FmKWH^?vM zU$d`S?}JU8Gr1XY4()c{9Icr}<7T@e)AqQUIZKj#REAF*5WbY~sIWrq2EM3zyZGo1p{oOAudTUcbh19%Mra`YoeV`}R(tS~+y5oXW*36~j zWXhFI&r}pKqLwnE#w69a$0cob$E$0#pH|8|*YB&Uh*tyJrpj#eMOB@iu&L<^?eGR` z#+Igd^{3k3Dl=*G!<>aaqkUP~$gN3l>&Y{~x+dAal~#G9z&l(|KWNDi<7fxkn3iyg zbK2lw)BcKq8eVJaKk3x_fNVYPB;@IK_{3G#;g3!4Q_4(_fO<*qD@3n>P!(C?#KE` zcf5LquI);+ot}{0yu^NE^R&PZ`3Hoy{u(;d5r{$8D4hS*qu`%sQj8 z|1>(*8bz4A6H3;isxr06%)*S)K(jZn-+8N?Q)guD$hdOe>Oj$=s{M5VYsM4JTLbx; zHS`<8zA7*^EcsRQfAvTPt_R1PLjm^A`X2XB`z=)g*SpP@sR+s{2eS0ihj-5bS9f!-uI}dEG-pXOFxGLYt!F15bwKJH)DqTbjX4k)xWWWnm$vlk zyR@Z0)ksnej28-Qv_|Q(mOL$YwWL+XS4oMS-4gASTH>o(91o@zr|xhpPhFAP>R6Wg zoZ|sjlW&vmm*>8rAM0z9l{HDezlEPP{Yiv2G0vrQlVdBV$>Dw^d(nd}vt4qvZcE0K7L30jwAamvio(}GF|o_vOgnxK$ktp@97C2^1NJsE>4TD3OfGT zGA603W%G%DR;K&TwER?e&$c{;`?C}ARn?9`JH{o69a~Ne+L-P$@A$dysym*>eTMMI z9Wgr|J#jH%LwA(wZv2k*xK)IWNpkMko}h6m8MtxeWm7he87faHF6)z}AWQd6=zAWm zKTqj<9(7rK)wrbTJ6`Y6ap`3@ZycxVnAIn{3>jwteb1@-^UZzFW6{yO%Lwup3vCcz zwZA?5K3BLyp7q?jQJ(e4d&^kfU#E8TztOGSQM@ty9=F~8=#KDRwY;U4H%jA}87x)e zf6>4F5qBlu?znHzbM@o62cERn2LZ)j})(4NZ|08#1>M5+Hi`<&ohf4c9!ac2LAc3e%Q0Y@Bt*nQ>9PZ^*NYVXPj@gYtditFkEWmU8V62PdrlW}RFm zdlh!VGHFB5Jdq*Zz3pdvbIFI;JESEeO80Nj{pp^q?oxM|d$!A3ZCgEN`Y2!Y>ayHIZ`^av)wSNOc?r)Y zt(GSf&Rc8TW1jO*(*5&l+}0Vxt7p4po@vj?IrebJ<*R4A!Y8sa&g8hK+}gBBj=k={ zjr-g_`@-sQeM6xEr=%`UNf&zC^V)k)O7oSVnA<*25ydwvzgB(i_qWS)u+}e@DI#%i8ECrt;)h#O5oaYOeCrK?7?vD?^)Bm?5olVcpvMN-ys2I_|b#&keGz=73@qNoo;id68y$ z|Nq}5#p8A>3GW@?KeL>qE=3-Nd<^mx-WLlG?E>q&wsiuq8 z$kb9xuZ9(RoSAx@2Z%G9I7iy$emt+Hn3MGt_NC*j8NnJUS^m5%;bQwUHE(u=OBg$# zgm?Fp&_W4g29)r@z7oF4=|k+dl<+Y<_Xo+nLN8$qaV+PR@TYp5<-~cKIB&L}ERiK@ z(@S(0R(`9eM1u#E=!KpVji5x)14{HtUx^MVgZ1*f)p4%Pj_SD;liP3g5=9eh#Caup zPmi;LI3E({t#*0a6|bH)ja(y3_E}B%Ehd?Fng0y*oZqP3COK+WlqF@|Jh9ou`G7hs zV)oR%sWL_E3x!i_+=n~DG31;l=eoU#@l|#1(-Z7lnKeVh*T~&(vun>J_SjX^Ni}Bc z18W|4T)y@6gtVI(CC+Ipq=x;5#(Daa-xTV>8fwpy3XK;}VKW}c4hF=VGF%;b3k zz9cnZzByxRzBS`!V*OyN9DjLhfwkJYmTjXm2Vfq@QA2 zHx+5QP43a!9p1@1E4(%C>vo#7>*Rf~uHmZM2)?%RltwUvR(K1!*@&RLh2EJvuPZgt z(xz#q8M9rSVX=pgXODgcjkZ3Gx9J-1N8|l{8t>FK-gSY-r91B*pi$~e(D(e`TO?P& zb<{?#dD6c&vBq8NZo=QGxUg7d?T<>+rl{r}rblWYTcz}6_fxgclwO@%=f1-C zvU_uBOpKIntF5ID^`*7cbQJ4l)6VBhM|pSbe7SUM-pf08mO9?qRqEg!h~MAD@LudD zO_Td$O}asH>=G03Rl7g7N>{$qbc)+3n=D!Qwk~{2n=Rzc-8;2^EKT9gbcy%9ojLGf zX@vLG&eia@(&6NCx^(zEKu(VY0DC~MI9RXQQ0W(MK5NQrDr(mtQn)~wt7eLWX(vf%P6};_j|l!UYtyLdL84x zZlO1mr^#ifC%Eb|%hu>mBPiFDvJvmVZ(<#`IKR8Ub@z_!ZIwh;x$7cFuXkwqjvMN- zv=8~BU@jK9qweNog7-Rxlqs3-huW@)sl2 z?uZ%Qq8Fux>vate>e@cG-i)?#zGLXuRZ{m@pRN*g-Rwxu-|L>|xNS&aQo3)x{#&5` zF3^8(*MAr4zjx@ri}c^mhROJ6^xqM2(x0mT-m8axuFL;h|9wrD4A(=)>c9Wc<#zq| zQ~h_X9%r5Y>(C`5^xsW-{2%MTPw2lt(SOr@@_&2W$=bu^v|jRpp5DWl#JAi1dEGzD ztoj(+>P}}zw!h;bXU>d?4%dUeTse_t&e7ln$4|rvi3eJ(B8zzwEi?}|3#X)dYV^BV^MFx z|I)fT{__q_F*;YS4Oh^E{qDk%Q7M7E(w9E77v?HRtMi(85_}EX?6RoEQD*JKb$V=i zK~!4reSt+Sje2HX6kk(?zpLt%n@Gp^UwU2^^+{5{v95og{#y9kuJh_AXBUgQ<6G({ z_x5Z5MVdDT)bCdp)Gt%7-#vQ$&b)MHK>f~8zkBrho$RY$=6Ury(^o(FomhYU?xB7W z+!epH&Z1f@vVNhm{?_>R7PRR#bdu_S-h$yYSL$iPEqIllYi|ni6Bw^%q$ z&4BuBJEuN_sn1<{eGZ>jpTpGWF1%K6lC0zo$OO_4=r5`&;Om ze=%M@{gxWZaT)b5(hOE*tA<-B7XQF8#+Qc!#@OUt1IO6pUA<%MGBrgXW5hnM4^CFY zV@!U*IIs`OF?O|{7dL>U9yrE$7nWbOe(M;Uub1`QIYhQt)W1lxazOo7T~NP)V{F^5 zfn#hN^$U-&r~2x5{utZVSHBC#*o!^&+pX7+RT%5^&RT&R)2GL#Nm-^YlckU{TiTF+ z+tNeyzWdj2vBY<`bU2L|)Zfx({QWH*P2XLt_uWf&pF0*W*)7z?dfy#BNVanHIeqt% zL9%_L2lU;=wCn_3w?&;yihtj-!}R)Gub1tA+Opv^iwD$a$vO4WsL$PceJanZPbKxa zTdz-yxy0FZaY_GRxQxj+w4x1v2XL8g@6Yd+u3l z=8Nl`x4yXBKM7YqA=&XPd+`fa=34i`XJ1>_yD|q{^0eev&jwt-fA;9Qfa~b9zh4)b zc}Fhan$7qV(s$&O_nqDEt}|s-PkNtpA;GWq{yKT)BGdkzSf4-pF=;=3_Vd29Um^Xc zp7x)+zgoA?{q?$iE=$$*S{h^2)G(6!`0=_wu0j9SME^RV_gjX`wKVot&h-PoLLrPe zgmC+^pYLgUPvEi}ZcDrsv!%@=4`ln|V>IH7vdoe4FQ>VNa>FwOU5!|4gN4JrA2 zr+CYQ4YPUIl44Ati3kt)60~)XlkDrvx(IZ{V8)b9HV_ z>Q-(jl%ksU!i-bMJfbG|?bQG3P>SAe4`h6<$J#1mA*pGX`9@V0X57>mK07px9r%6X zIp<@FXqE4P_qR`R%=TT>XmgwycJYQ-2YcKbgY-p5**&Jlq17r_u z=wJK#O8@OAeAJ--jPDj@&mQ?E`_N!H>v39m&%KG(8N1zu8D)*~v~e!q9Byxp= zG4>Fnu5nH~;jPjVPwAG}61GHM&>h?>v|bxDYzW7@MY-mhp455I5&ry!pnL93FYQ&d zmp4?qr^GGs9cnbuQlq@D?6qoEdSCiQf9D(E(2eY5x7y|NVwGyd8r&v)Xh}n!own0XsTCjDqnpBCX7;vIxRv6iG@`%tQY+GZPGkdHRLW9iyaBD0 zZ~u>`-Z6guw~P9hsQP*8F83UzR9e2Z%RcLZQawPa=A2im2P(qlT5w*u9-s_M&MTLc zJ#ay}N}A*;YpkQ`##>VROS0P?K6#Id>#>zHY^x_PC{-J!sy?q&Z9S!GKCe`5l;Gv_ zN+o4&=aq^%Bf`tK%K7$RQ@U8kuDrmF)@kALh0mJz46xL@7nCVF@R9xR=anfs5Vq78 z=anfsknU5O&uu{|i$*rE1^dgC)-21kJFn@yGVOI|-jqH){RjPT@elD__=Elvmhf5A zTF#pK&zI)5ciFR=>*ZIplzN(8k{f%iqwdSM|E1ZY=3B#K_lCq95|@s% zsJB{!mDMp9Yw~_P-E~Lv6M;xISA1bl&u{iVu|I3xS^I0L@)S(|4dv?{GF6Scgpe6f zHnsQWeRcDKz(`+#ei~HHcpLZ~KWjSQzt`o}vj|+@9ha1>OR!2P%8>eQ|$V;HLahT3~8T-T}1D_Eo#z4k+5&ff?Rc_rDu3 zsn-jn-Xk(BUk~G6zGUh&@2{Kx6!`7_tXqG_ef+t(ydAKp?~;dJOYX6F=g#V~pVIBC zRDNXtRJWc|8SV>bd!EvL-}L#E7TFi_?IkT1RgDSTSGMb}ymQ-iPhNk!R>!1saGLxITgC6a%QKJ0RWt zm1cHe_h`|>S~4!zqWP=j=yxvH#^H|RyBz(mrdiqLmpmismH*YU)P4iKnOkbkhI^%d zZutXk@6>Ou1MYjZ=yCrW@2~$CUDf|zsp@lmH?9Rjp8v~z-(<&`ag!XMjr*=u{Qmav z?0)WFxOCZFK@W5LGizEQS;XChE80TYPGOe8&dogAE(&EIi4vYI#Fft} z=#znG^TX~InZ~tJct%2<;*og@p%SBORVb7!j>ibkvWtYrOS&|`{ zc|PtTbnKLRoI=QQ2+BTMM505ql@bTnZzGH~)O5l>eUxapDEAc7j9Q5qQ?i{jGP*h>=rftc;^;Y`Q_k_m>0-2Ie6}QV47dG1mW-$cBFNX^S z{yg>FkonOlaqpoKq6pg1^$6~x@DSJ{Mu;IW6lhctd&nYe5IQtm_zw=JWKqa1Vixuj z2Q4BF-J$of^#_W`KEyzR8IS`aW|)|Un<^s2Wgy^jZ2bg?2=5kJA7Zz8Umj8PRa z2ufvmL3gN}cBDmw7`j}1og4}srS4S;-Pt)>vFIZEnYgo{UT8uYBE&9ic7`$V6l1!a z@uwv+roJz2@{kC1VUNmT;>ai6*}>3ogztr%;14Yq4ycE*Ai8Ba9l}3ch*j^=xA2EP zBdtcO#Vr>$$`g7&6iShCA#`~tWW)7$FBfrQ85E;qFm*8t6}g71;l{ycaF|KgDMT86 z7i58%HYmkiOFuW`CP3(PD0EV9XN@UnxHt%}!XbDARLZ46N|&4`=>MesGLrd;I^T!8 z1nz+%a3MSOVR!Zk=!8Yc7Q@V;v0@?!7j+wkA4CkjUCO}xc&xA?bHx>liXpM0T=Jt> zQ2{B`W3ZD>`$VjWBTkx3`!#x^hKX6x%SDcLx$xk6aL39zqbot`MVC5cn9#tF9#7RU zkpq5o$D=P6nkbtUw~cY#31TDhWnQ%P5-5UYPz(=4Xk#ce7CXcc$J{zhlseJpUmX(acX?(E!8 zyR*M$q#g}*XCJW)6K$JgMQE@LgHP!y$VD`ry(L!m#O${J3WeHMg+k^hh3Cj-y0JM* z_y;-r_@VE4piM%YLy$xG5nMm6hrG>k%rT&X24|^m zDPd#ri!LF=len~36|zmTtRr~tj-m~*1u`??W`%NrNb1gZf(LBi0zVW%0pvmz*zCAq zbFgkox%3Z5#vbJG4y zp(2iYB!B~)kVBeM()dC6nOhw6&B|n9W}Onut3~v`LyTun6*5pk0~^Fa0yw}4DKHkq zv1Bomv|aR)v+y;T>1eM>J;)CSlf~2c|NfltxS}YRHcVuK8C1}~1vW^7I7k2oq`+91 z2u=txR)dV)CJFvp3I2NAdfZmrR@|Ks9Erc0IbtGtxNuvEi|i1xlf>(SGth?Ytn~jV zR(y?n6897wBVHDH=E5x41^Yk@#wE>B!cTx00Z%iZkj6v$EU{877OSv5^Ot{>n8o~) z12aKjt_0-Ae`FE)#PJVZDO8V~w;vVCCLu1bVk!fHUqsQ5y|_V;vRv2<`9fq}#{32e zAixGG>T+@I?JQ@^c}_X+nV9pD=cZUhHfvCvUxsmz_o zXF>wH9FWOcc1T|fipAMj)4Cw&_>JOqc~2(;DU zheL3b=VNdjPQX!Q$KW`aS>rvhi~8wu3nbtlg+Ng4fPA~I(ZV;V{mEW!^O zs9**mp}pjqCq}GfgoRO}K}EKX5Qx#v0xBan9y#fv>lS^Eoc%W>my z6F`V;%EOw+K0JkTn&}g>nCG+Te=^7>%TD}dv|%wk3_ijZV!KS-eEAFoun@eE3lqq< zO0HA44iht(_i~uOMG?y^HnBxo#1do)^f@7Mk%s5RFiXaTi82mp9E3Z;1v7chhgnbr z9#}|R8}pP8w}5AXyR(ckNq+)%0~OY>XA3|XIOX1lHZ>Q-3axIlYcq1?-8N`_t&_aa5uwD+FB&grxIhu7S{PQ z_?O{2c@EeyNi*9?;eli*2Mk2EPNr z7|Jc7t{~PtB*vCLBoe>_IUvd&5;kx^3eRR-7fh6{44*)H_GO1XON4XU65#+Z&-sLV zAs1#s6>-ZUO{Re=@~VJxI7Qwk;REP|6ChSUB#z-8ft}C_^-v2z*aDlN3M#-4v%mv3 z&_KXZ^c;pZ*a=M#gw3!Cs-ObOp%nb^Ff4;2SOSY7lq|$T+yd}HK6oJ)X2DFz0S}~r z6C98LaiD<;<$m%3KlmUQX2DFz0S`FA0SOQXHqbyo8uP+L`bX|o)-E|OvY*ajZ|%8` zwIG`HBZhSWSKuCM#XZa1A?Hcvn3?QDYgq$r%wwgjeNO4J4yNJST&%me4p7m#n7yu< zbUDmH0(~Bt&n(uibr=FXgV61xZ?QJEfeG1AC}7`{b1h{cd?qN!hJcy4DwGnp914&X zfrq#r{^2Fbm=!4O}o0G9e3`LH1JQy9tiTwYrKqlQPvq1LOoPq6N2&y`BfZ z2iG>5xuAu%<^9+!_k^xw@jK!ihfMm;L^#g;*a=-=vk_n2ak;qTSHtBYkpKC=#)yM| zA1oHH8#IwM=LGrgcbmna|DXy5(qf{(Zz9bfS(4Eegj%SFCV2Bp$&vSCNKzF7QAubW(;c_%vVfY%wEepJ=XUo#c#U zG90(96rJz^oP-mw35doy<4Umwg7Wz)Ec7wuyPh+sU+~_>x+rKH;ZGL9`Q$@hN3iKk z);lqj_j267k>6PAs>!&#EADV9;;?LQ<_Hzn%${x{?-3r^hub)RV4sltIQ@lZXEpu! z6^>?BvtFSuzC2mPA0~fiu{i!AVZUGrBA&`za}-;<(1O19da|7j!K~ml#i-ZS*-@ zr;E0jKzYbtx?PY!d?p1^gzJYY2tq3y1rMlB&NjdXF3RG8(#w{MZVv7|>}zviCitNg z%E1Yt52@1x_EZmI%RNGz=w@l9o?c<(QtkH2(IIndr7rU5K{80Nt@~18n6^n?% zwoz@=`LB%MQfvZgP>Wr+K??8q$AavoDdPvQkp5TzrH~7)lv6CA9<< zw54od5fVjm9uZw0=S1oSHW1?dP{-E^!ZY`N%fu!0C&+?K5bu5-nS$$tG#D#oVz@ZM zxza?=m;C4}l{(*iYO4PZ#tHcs-A)-6k_HlZ?pjP<;97*e@1*bGil5L{-7Cal%6t?$ zVeAh-7KbmcHzztNs%eQBJGVP~i&Dtk!d?;DXt2>ZQz77>`*ij8SATgB6BWOY*Rd*>8wSKipO>f@AgfK=OkyU znazr)fafK+nYc5Nm*aNQZXO7=36Y7dTd~FS^eO+3=tGiaP)MA6h+9NEE~Q;~Zsj=_ zzw18w6lr!s8|;ILlw+ccGd0{4T<5)%k+ebL`tPT$c`oAFhQAB8G$@`{$|4>>28Yn` z^L;whp3l!71f@`T7?y?g13 zwW5mW$DtGUV5bxyD zw6(K4TW%?ivS0i#*5tVioi7oO{>MqKygB*?r#tiqWG7(fA+wmZC&IJv#RyLd<#EDT zFiAtTtqpy>O7L%O$j|;{XOf93MT-qE?2saczy!Y{GGHQ1g8K-|fX(P$NZwrAh_=5o zKcLfN>dyY;K=Yqe=Dzy36i+el_`aVho**uk%(gR@F2Ri?O)l|gfp=(#z63vw@FYOd z7?UR#Sp^(}g&SF`As=SJV#om>=; zU7IXi$i||pNUndh{bBqO;xL2dFz2+KQ?O@nc#1^=WSUp8p05&}#K|WtLIjBy6pCQ( zhr!|z?k*nTuE(Q912>a|NnMYKDBuJk!USgVC9T9dqNhKjWUt~*MCMDGCcz>6o=%JS z8a%^!*Kly22&dpAIFKbk=t(-2N>gd}7$MA2) z4MN&`*bKJ=UsApR!gRWb~^;0&CFui+$!ThT9F#)ekE zN?6&qXdu=ni!|D5EVQxLRgfuiKjT~_T;yZn*}`*{LcMth5OVKmTE&Gn_XyO*)249t zOCJIaA}EgnW{8A1`HUX3d6noQe%m*zL%Gwj!6969e;)*U#iQ>r7vT33)(T$yMKbIo z`jF{SF$^6KeyND%a369L{bVSNf(eiTqhT0CfEz}F0z<$BPOyQ1IP^uLrxhD6z#Yo- zBHW8`7vlcMqwJG#ClfacE`fNs0xWPTTnsAZJBsZzSWDR+B+Z?$4gR|%S*%?{x>D*- z+&D;qiSXaZt|?0vKY?xd6Un!jH1!gm&yu#Bvze3V4|Kn>nl90-K-}c0v=>LOleb06k{xGX%F3*AJUv3v7aNr~o(NE|?5g zz$D0k2`~yq!{$#|ZvvbVKoR(%3S8iUT*yI}2lBxOUKmMQCtL!T!WdXh`Iq84v4Lj> z>vay}iG7BP_%X=uhrtjH!c;6~%J-raA%?kF0$SMbi&e}ct9Zx4mFKmff(AB-gTu_- zCnX8MyFiFLwUepxRcn>Vlxd%atH85L_;}9oGq#xD{8oz?#Qmv1Tkd?A%@{jHa{NFL za-jf@5$`yNdJAVd%oF%Kp&Uw~0;*sWY=$imgj%SFCTNA7unUgD5!eT9a0m`d|6uB? zkxx2f9aQ`dD2B0+2KnFvrIY;-t`lq!2O5|`fX76euwPqQ`@D5|2i~IK%BgOlF`NJr6W+)WHI;{cLk2rWq zg!mI}@%j&+2{{^tu;b55+Yu^d4^|EpPz8SWcWrWy7qv<#2kCc^3q`P*@~bJ_v4aLS zNP~P3+_`vhb72-%WbHf!F+AH~D8#{8 zWbwEOpkL^UuS2=l5C?+vSwD1L&HfO&@V7w`ejA9dvAOvyJxGL}j_25(G;NSOlm0{g zRWL4A5v{ntr5m<|gt#t}cMxbIP8HbbXZ7=CKgc1 z@Kjx%J1DzLh_}t8mn$MWK{kfHIw|_L} b4F2C~4_;jUw%c04m4Mu7{r~pwLDT;MxmF=) literal 0 HcmV?d00001 diff --git a/target/linux/ipq807x/base-files/lib/upgrade/platform.sh b/target/linux/ipq807x/base-files/lib/upgrade/platform.sh new file mode 100755 index 0000000000..407db099f6 --- /dev/null +++ b/target/linux/ipq807x/base-files/lib/upgrade/platform.sh @@ -0,0 +1,72 @@ +. /lib/functions/system.sh + +qca_do_upgrade() { + local tar_file="$1" + + local board_dir=$(tar tf $tar_file | grep -m 1 '^sysupgrade-.*/$') + board_dir=${board_dir%/} + local dev=$(find_mtd_chardev "0:HLOS") + + tar Oxf $tar_file ${board_dir}/kernel | mtd write - ${dev} + + if [ -n "$UPGRADE_BACKUP" ]; then + tar Oxf $tar_file ${board_dir}/root | mtd -j "$UPGRADE_BACKUP" write - rootfs + else + tar Oxf $tar_file ${board_dir}/root | mtd write - rootfs + fi +} + +platform_check_image() { + local magic_long="$(get_magic_long "$1")" + board=$(board_name) + case $board in + cig,wf188|\ + cig,wf188n|\ + cig,wf194c|\ + cig,wf194c4|\ + wallys,dr6018|\ + edgecore,eap101|\ + edgecore,eap102|\ + edgecore,eap106|\ + tplink,ex227|\ + tplink,ex447|\ + qcom,ipq6018-cp01|\ + qcom,ipq807x-hk01|\ + qcom,ipq807x-hk14|\ + qcom,ipq5018-mp03.3) + [ "$magic_long" = "73797375" ] && return 0 + ;; + esac + return 1 +} + +platform_do_upgrade() { + CI_UBIPART="rootfs" + CI_ROOTPART="ubi_rootfs" + CI_IPQ807X=1 + + board=$(board_name) + case $board in + cig,wf188) + qca_do_upgrade $1 + ;; + cig,wf188n|\ + cig,wf194c|\ + cig,wf194c4|\ + qcom,ipq6018-cp01|\ + qcom,ipq807x-hk01|\ + qcom,ipq807x-hk14|\ + qcom,ipq5018-mp03.3|\ + wallys,dr6018|\ + tplink,ex447|\ + tplink,ex227) + nand_upgrade_tar "$1" + ;; + edgecore,eap106|\ + edgecore,eap102|\ + edgecore,eap101) + CI_UBIPART="rootfs1" + nand_upgrade_tar "$1" + ;; + esac +} diff --git a/target/linux/ipq807x/config-4.4 b/target/linux/ipq807x/config-4.4 new file mode 100644 index 0000000000..1b4b95b968 --- /dev/null +++ b/target/linux/ipq807x/config-4.4 @@ -0,0 +1,828 @@ +# CONFIG_AHCI_IPQ is not set +CONFIG_ALIGNMENT_TRAP=y +# CONFIG_ALLOW_DEV_COREDUMP is not set +# CONFIG_AMBA_PL08X is not set +# CONFIG_APM_EMULATION is not set +# CONFIG_APQ_GCC_8084 is not set +# CONFIG_APQ_MMCC_8084 is not set +# CONFIG_AR8216_PHY is not set +CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE=y +CONFIG_ARCH_HAS_ELF_RANDOMIZE=y +CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y +CONFIG_ARCH_HAS_SG_CHAIN=y +CONFIG_ARCH_HAS_TICK_BROADCAST=y +CONFIG_ARCH_HAVE_CUSTOM_GPIO_H=y +# CONFIG_ARCH_IPQ40XX is not set +# CONFIG_ARCH_IPQ806x is not set +# CONFIG_ARCH_IPQ807x is not set +# CONFIG_ARCH_IPQ6018 is not set +# CONFIG_ARCH_IPQ5018 is not set +CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y +# CONFIG_ARCH_MSM8960 is not set +# CONFIG_ARCH_MSM8974 is not set +CONFIG_ARCH_MSM8X60=y +CONFIG_ARCH_MULTIPLATFORM=y +# CONFIG_ARCH_MULTI_CPU_AUTO is not set +CONFIG_ARCH_MULTI_V6_V7=y +CONFIG_ARCH_MULTI_V7=y +CONFIG_ARCH_NR_GPIO=0 +CONFIG_ARCH_QCOM=y +CONFIG_QSEECOM=m +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set +CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y +CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y +CONFIG_ARCH_SUPPORTS_UPROBES=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_ARCH_USE_BUILTIN_BSWAP=y +CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y +CONFIG_ARCH_WANT_GENERAL_HUGETLB=y +CONFIG_ARCH_WANT_IPC_PARSE_VERSION=y +CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y +CONFIG_ARM=y +CONFIG_ARM_AMBA=y +CONFIG_ARM_ARCH_TIMER=y +CONFIG_ARM_ARCH_TIMER_EVTSTREAM=y +# CONFIG_ARM_ATAG_DTB_COMPAT is not set +CONFIG_ARM_CCI=y +CONFIG_ARM_CCI400_COMMON=y +CONFIG_ARM_CCI400_PMU=y +CONFIG_ARM_CCI_PMU=y +CONFIG_ARM_CPU_SUSPEND=y +CONFIG_ARM_GIC=y +CONFIG_ARM_HAS_SG_CHAIN=y +# CONFIG_ARM_HIGHBANK_CPUIDLE is not set +CONFIG_ARM_CPUIDLE=y +CONFIG_ARM_L1_CACHE_SHIFT=6 +CONFIG_ARM_L1_CACHE_SHIFT_6=y +# CONFIG_ARM_LPAE is not set +CONFIG_ARM_MODULE_PLTS=y +CONFIG_ARM_PATCH_PHYS_VIRT=y +CONFIG_ARM_PMU=y +CONFIG_ARM_PSCI=y +CONFIG_ARM_PSCI_FW=y +CONFIG_ARM_QCOM_CPUFREQ=y +# CONFIG_ARM_SMMU is not set +# CONFIG_ARM_SP805_WATCHDOG is not set +CONFIG_ARM_THUMB=y +# CONFIG_ARM_THUMBEE is not set +CONFIG_ARM_UNWIND=y +CONFIG_ARM_VIRT_EXT=y +CONFIG_AT803X_PHY=y +# CONFIG_ATA is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NVME=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +# CONFIG_VIRTIO_BLK is not set +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_BOUNCE=y +CONFIG_BUILD_BIN2C=y +# CONFIG_CNSS_QCN9000 is not set +# CONFIG_CNSS2 is not set +# CONFIG_CNSS2_GENL is not set +# CONFIG_CNSS2_DEBUG is not set +# CONFIG_CNSS2_PM is not set +# CONFIG_CNSS2_PCI_DRIVER is not set +# CONFIG_CNSS2_CALIBRATION_SUPPORT is not set +# CONFIG_CNSS2_SMMU is not set +# CONFIG_CNSS2_RAMDUMP is not set +# CONFIG_CACHE_L2X0 is not set +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_CC_STACKPROTECTOR=y +# CONFIG_CC_STACKPROTECTOR_NONE is not set +CONFIG_CC_STACKPROTECTOR_REGULAR=y +# CONFIG_CHARGER_QCOM_SMBB is not set +CONFIG_CLEANCACHE=y +CONFIG_CLKDEV_LOOKUP=y +CONFIG_CLKSRC_OF=y +CONFIG_CLKSRC_PROBE=y +CONFIG_CLKSRC_QCOM=y +CONFIG_CLONE_BACKWARDS=y +CONFIG_COMMON_CLK=y +CONFIG_COMMON_CLK_QCOM=y +CONFIG_CONFIGFS_FS=y +CONFIG_COREDUMP=y +# CONFIG_CORESIGHT is not set +# CONFIG_CORESIGHT_CSR is not set +# CONFIG_CORESIGHT_CTI is not set +# NFIG_CORESIGHT_EVENT is not set +# CONFIG_CORESIGHT_HWEVENT is not set +# CONFIG_CORESIGHT_LINKS_AND_SINKS is not set +# CONFIG_CORESIGHT_LINK_AND_SINK_TMC is not set +# CONFIG_CORESIGHT_QCOM_REPLICATOR is not set +# CONFIG_CORESIGHT_QPDI is not set +# CONFIG_CORESIGHT_SINK_ETBV10 is not set +# CONFIG_CORESIGHT_SINK_TPIU is not set +# CONFIG_CORESIGHT_SOURCE_DUMMY is not set +# CONFIG_CORESIGHT_SOURCE_ETM3X is not set +# CONFIG_CORESIGHT_SOURCE_ETM4X is not set +# CONFIG_CORESIGHT_REMOTE_ETM is not set +# CONFIG_CORESIGHT_STM is not set +# CONFIG_CORESIGHT_TPDA is not set +# CONFIG_CORESIGHT_TPDM is not set +# CONFIG_CORESIGHT_TPDM_DEFAULT_ENABLE is not set +# CONFIG_CORESIGHT_STREAM is not set +CONFIG_CPUFREQ_DT=y +CONFIG_CPUFREQ_DT_PLATDEV=y +CONFIG_CPU_32v6K=y +CONFIG_CPU_32v7=y +CONFIG_CPU_ABRT_EV7=y +# CONFIG_CPU_BIG_ENDIAN is not set +# CONFIG_CPU_BPREDICT_DISABLE is not set +CONFIG_CPU_CACHE_V7=y +CONFIG_CPU_CACHE_VIPT=y +CONFIG_CPU_COPY_V6=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y +# CONFIG_CPU_SW_DOMAIN_PAN is not set +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_COMMON=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_HAS_ASID=y +# CONFIG_CPU_ICACHE_DISABLE is not set +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y +CONFIG_CPU_PABRT_V7=y +CONFIG_CPU_PM=y +CONFIG_CPU_RMAP=y +# CONFIG_CPU_THERMAL is not set +CONFIG_CPU_TLB_V7=y +CONFIG_CPU_V7=y +CONFIG_CRC16=y +# CONFIG_CRC32_SARWATE is not set +CONFIG_CRC32_SLICEBY8=y +CONFIG_CROSS_MEMORY_ATTACH=y +# CONFIG_CRYPTO_DEV_QCOM_MSM_QCE is not set +# CONFIG_CRYPTO_DEV_OTA_CRYPTO is not set +# CONFIG_FIPS_ENABLE is not set +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_CCM=y +CONFIG_CRYPTO_CRC32C=y +CONFIG_CRYPTO_CTR=y +CONFIG_CRYPTO_DEFLATE=y +CONFIG_CRYPTO_CMAC=y +# CONFIG_CRYPTO_DEV_QCOM_ICE is not set +CONFIG_CRYPTO_ECHAINIV=y +CONFIG_CRYPTO_ECB=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_HASH_INFO=y +CONFIG_CRYPTO_HW=y +CONFIG_CRYPTO_LZO=y +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +CONFIG_CRYPTO_NULL=y +CONFIG_CRYPTO_RNG=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_SHA256=y +CONFIG_CRYPTO_WORKQUEUE=y +CONFIG_CRYPTO_XZ=y +CONFIG_CRYPTO_ARC4=y +CONFIG_CRYPTO_GCM=y +CONFIG_DCACHE_WORD_ACCESS=y +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_GPIO=y +# CONFIG_DEBUG_INFO_REDUCED is not set +CONFIG_DEBUG_LL_INCLUDE="mach/debug-macro.S" +# CONFIG_DEBUG_MEM_USAGE is not set +# CONFIG_DEBUG_UART_8250 is not set +# CONFIG_DEBUG_USER is not set +CONFIG_DECOMPRESS_GZIP=y +CONFIG_DEVMEM=y +# CONFIG_DIAG_OVER_USB is not set +CONFIG_DMADEVICES=y +CONFIG_DMA_ENGINE=y +CONFIG_DMA_OF=y +CONFIG_DMA_VIRTUAL_CHANNELS=y +CONFIG_DTC=y +# CONFIG_DWMAC_GENERIC is not set +# CONFIG_DWMAC_IPQ806X is not set +# CONFIG_DWMAC_SUNXI is not set +# CONFIG_DW_DMAC_PCI is not set +# CONFIG_VHOST_NET is not set +# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set +CONFIG_DYNAMIC_DEBUG=y +CONFIG_ETHERNET_PACKET_MANGLE=y +CONFIG_EXT4_FS=y +# CONFIG_EXT4_USE_FOR_EXT2 is not set +CONFIG_FB=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_IMAGEBLIT=y +CONFIG_FB_CMDLINE=y +CONFIG_FB_QCOM_QPIC=y +CONFIG_FB_QCOM_QPIC_ER_SSD1963_PANEL=y +CONFIG_FB_SYS_FOPS=y +CONFIG_FIXED_PHY=y +CONFIG_FIX_EARLYCON_MEM=y +CONFIG_FS_MBCACHE=y +CONFIG_GENERIC_ALLOCATOR=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +# CONFIG_GENERIC_CPUFREQ_KRAIT is not set +CONFIG_GENERIC_IDLE_POLL_SETUP=y +CONFIG_GENERIC_IO=y +CONFIG_GENERIC_IRQ_SHOW=y +CONFIG_GENERIC_IRQ_SHOW_LEVEL=y +CONFIG_GENERIC_MSI_IRQ=y +CONFIG_GENERIC_PCI_IOMAP=y +CONFIG_GENERIC_PHY=y +CONFIG_GENERIC_PINCONF=y +CONFIG_GENERIC_SCHED_CLOCK=y +CONFIG_GENERIC_SMP_IDLE_THREAD=y +CONFIG_GENERIC_STRNCPY_FROM_USER=y +CONFIG_GENERIC_STRNLEN_USER=y +CONFIG_GENERIC_TIME_VSYSCALL=y +CONFIG_GPIOLIB=y +CONFIG_GPIOLIB_IRQCHIP=y +CONFIG_GPIO_DEVRES=y +# CONFIG_GPIO_LATCH is not set +# CONFIG_GPIO_NXP_74HC153 is not set +CONFIG_GPIO_SYSFS=y +CONFIG_HANDLE_DOMAIN_IRQ=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_HAS_DMA=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT_MAP=y +# CONFIG_HAVE_64BIT_ALIGNED_ACCESS is not set +CONFIG_HAVE_ARCH_AUDITSYSCALL=y +CONFIG_HAVE_ARCH_BITREVERSE=y +CONFIG_HAVE_ARCH_JUMP_LABEL=y +CONFIG_HAVE_ARCH_KGDB=y +CONFIG_HAVE_ARCH_PFN_VALID=y +CONFIG_HAVE_ARCH_SECCOMP_FILTER=y +CONFIG_HAVE_ARCH_TRACEHOOK=y +CONFIG_HAVE_ARM_ARCH_TIMER=y +# CONFIG_HAVE_BOOTMEM_INFO_NODE is not set +CONFIG_HAVE_BPF_JIT=y +CONFIG_HAVE_CC_STACKPROTECTOR=y +CONFIG_HAVE_CLK=y +CONFIG_HAVE_CLK_PREPARE=y +CONFIG_HAVE_CONTEXT_TRACKING=y +CONFIG_HAVE_C_RECORDMCOUNT=y +CONFIG_HAVE_DEBUG_KMEMLEAK=y +CONFIG_HAVE_DMA_API_DEBUG=y +CONFIG_HAVE_DMA_ATTRS=y +CONFIG_HAVE_DMA_CONTIGUOUS=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_FUNCTION_TRACER=y +# CONFIG_SRD_TRACE is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_HAVE_HW_BREAKPOINT=y +CONFIG_HAVE_IDE=y +CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y +CONFIG_HAVE_KERNEL_GZIP=y +CONFIG_HAVE_KERNEL_LZ4=y +CONFIG_HAVE_KERNEL_LZMA=y +CONFIG_HAVE_KERNEL_LZO=y +CONFIG_HAVE_KERNEL_XZ=y +# CONFIG_HAVE_KPROBES is not set +# CONFIG_HAVE_KRETPROBES is not set +CONFIG_HAVE_MEMBLOCK=y +CONFIG_HAVE_MOD_ARCH_SPECIFIC=y +CONFIG_HAVE_NET_DSA=y +CONFIG_HAVE_OPROFILE=y +# CONFIG_HAVE_OPTPROBES is not set +CONFIG_HAVE_PERF_EVENTS=y +CONFIG_HAVE_PERF_REGS=y +CONFIG_HAVE_PERF_USER_STACK_DUMP=y +CONFIG_HAVE_PROC_CPU=y +CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y +CONFIG_HAVE_SMP=y +CONFIG_HAVE_SYSCALL_TRACEPOINTS=y +CONFIG_HAVE_UID16=y +CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y +CONFIG_HIGHMEM=y +CONFIG_HIGHPTE=y +CONFIG_HOTPLUG_CPU=y +CONFIG_HWMON=y +CONFIG_HWSPINLOCK=y +CONFIG_HWSPINLOCK_QCOM=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_MSM=y +CONFIG_HZ_FIXED=0 +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_COMPAT=y +CONFIG_I2C_HELPER_AUTO=y +CONFIG_I2C_QUP=y +# CONFIG_IIO is not set +# CONFIG_IIO_BUFFER is not set +# CONFIG_IIO_TRIGGER is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_INPUT_PM8941_PWRKEY is not set +CONFIG_IOMMU_HELPER=y +# CONFIG_IOMMU_IO_PGTABLE_LPAE is not set +# CONFIG_IPQ_DWC3_QTI_EXTCON is not set +# CONFIG_IPQ_GCC_4019 is not set +# CONFIG_IPQ_GCC_5018 is not set +# CONFIG_IPQ_APSS_5018 is not set +# CONFIG_IPQ_GCC_6018 is not set +# CONFIG_IPQ_APSS_6018 is not set +# CONFIG_IPQ_GCC_806X is not set +# CONFIG_IPQ_ADSS_807x is not set +# CONFIG_IPQ_APSS_807x is not set +# CONFIG_IPQ_GCC_807x is not set +# CONFIG_IPQ_ADCC_4019 is not set +# CONFIG_IPQ_LCC_806X is not set +# CONFIG_IPQ_REMOTEPROC_ADSP is not set +# CONFIG_IPQ_SUBSYSTEM_RESTART is not set +# CONFIG_IPQ_SUBSYSTEM_RESTART_TEST is not set +CONFIG_IRQCHIP=y +CONFIG_IRQ_DOMAIN=y +CONFIG_IRQ_DOMAIN_HIERARCHY=y +CONFIG_IRQ_FORCED_THREADING=y +CONFIG_IRQ_WORK=y +CONFIG_JBD2=y +# CONFIG_IPC_ROUTER is not set +# CONFIG_IPC_ROUTER_SECURITY is not set +# CONFIG_IPC_LOGGING is not set +CONFIG_KPSS_XCC=y +# CONFIG_KRAITCC is not set +# CONFIG_KRAIT_CLOCKS is not set +# CONFIG_KRAIT_L2_ACCESSORS is not set +CONFIG_LEDS_IPQ=y +CONFIG_LEDS_TLC591XX=y +# CONFIG_LEDS_PCA9956B is not set +CONFIG_LIBFDT=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_LOCK_SPIN_ON_OWNER=y +CONFIG_LZO_COMPRESS=y +CONFIG_LZO_DECOMPRESS=y +CONFIG_MDIO=y +CONFIG_MDIO_BITBANG=y +CONFIG_MDIO_BOARDINFO=y +CONFIG_MDIO_GPIO=y +# CONFIG_MDIO_QCA is not set +CONFIG_MFD_QCOM_RPM=y +CONFIG_MFD_SPMI_PMIC=y +# CONFIG_SLIMBUS is not set +# CONFIG_SLIMBUS_MSM_CTRL is not set +# CONFIG_SLIMBUS_MSM_NGD is not set +# CONFIG_OF_SLIMBUS is not set +CONFIG_MFD_SYSCON=y +CONFIG_MIGHT_HAVE_CACHE_L2X0=y +CONFIG_MIGHT_HAVE_PCI=y +CONFIG_MMC=y +CONFIG_MMC_ARMMMCI=y +CONFIG_MMC_BLOCK=y +CONFIG_MMC_BLOCK_MINORS=32 +CONFIG_MMC_QCOM_DML=y +CONFIG_MMC_QCOM_TUNING=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_MSM=y +# CONFIG_MMC_SDHCI_OF_ARASAN is not set +# CONFIG_MMC_SDHCI_PCI is not set +CONFIG_MMC_SDHCI_PLTFM=y +# CONFIG_MMC_TIFM_SD is not set +CONFIG_MODULES_TREE_LOOKUP=y +CONFIG_MODULES_USE_ELF_REL=y +# CONFIG_MPLS_ROUTING is not set +# CONFIG_MSM_GCC_8660 is not set +# CONFIG_MSM_GCC_8916 is not set +# CONFIG_MSM_GCC_8960 is not set +# CONFIG_MSM_GCC_8974 is not set +# CONFIG_MSM_LCC_8960 is not set +# CONFIG_MSM_MMCC_8960 is not set +# CONFIG_MSM_MMCC_8974 is not set +# CONFIG_MSM_MHI is not set +# CONFIG_MSM_IPC_ROUTER_MHI_XPRT is not set +# CONFIG_MSM_MHI_DEBUG is not set +# CONFIG_MSM_MHI_DEV is not set +# CONFIG_MSM_MHI_UCI is not set +# CONFIG_DIAGFWD_BRIDGE_CODE is not set +# CONFIG_MSM_BUS_SCALING is not set +# CONFIG_BUS_TOPOLOGY_ADHOC is not set +# CONFIG_QPNP_REVID is not set +# CONFIG_SPS is not set +# CONFIG_SPS_SUPPORT_NDP_BAM is not set +# CONFIG_USB_BAM is not set +# CONFIG_SPS_SUPPORT_BAMDMA is not set +# CONFIG_IPA is not set +# CONFIG_IPA3 is not set +# CONFIG_EP_PCIE is not set +# CONFIG_GSI is not set +# CONFIG_PFT is not set +# CONFIG_SEEMP_CORE is not set +# CONFIG_GPIO_USB_DETECT is not set +# CONFIG_MSM_GLINK is not set +# CONFIG_MSM_GLINK_LOOPBACK_SERVER is not set +# CONFIG_MSM_GLINK_SMEM_NATIVE_XPRT is not set +# CONFIG_MSM_GLINK_PKT is not set +# CONFIG_MSM_IPC_ROUTER_GLINK_XPRT is not set +# CONFIG_MSM_QMI_INTERFACE is not set +# CONFIG_MSM_TEST_QMI_CLIENT is not set +# CONFIG_GLINK_DEBUG_FS is not set +# CONFIG_MSM_RPM_SMD is not set +# CONFIG_MSM_RPM_GLINK is not set +CONFIG_MSM_RPM_LOG=y +# CONFIG_MSM_SMEM is not set +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_M25P80=y +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_ECC=y +CONFIG_MTD_NAND_QCOM=y +CONFIG_MTD_QCOM_SMEM_PARTS=y +CONFIG_MTD_SPINAND_GIGADEVICE=y +CONFIG_MTD_SPINAND_MT29F=y +CONFIG_MTD_SPINAND_ONDIEECC=y +CONFIG_MTD_SPI_NOR=y +CONFIG_MTD_SPLIT_FIRMWARE=y +CONFIG_MTD_SPLIT_FIT_FW=y +CONFIG_MTD_UBI=y +CONFIG_MTD_UBI_BEB_LIMIT=20 +CONFIG_MTD_UBI_BLOCK=y +# CONFIG_MTD_UBI_FASTMAP is not set +CONFIG_MTD_UBI_GLUEBI=y +CONFIG_MTD_UBI_WL_THRESHOLD=4096 +CONFIG_MULTI_IRQ_HANDLER=y +CONFIG_MUTEX_SPIN_ON_OWNER=y +CONFIG_NEED_DMA_MAP_STATE=y +CONFIG_NEON=y +CONFIG_NET=y +# CONFIG_NET_DSA_MV88E6063 is not set +CONFIG_NET_FLOW_LIMIT=y +CONFIG_NET_PTP_CLASSIFY=y +CONFIG_NO_BOOTMEM=y +CONFIG_NO_HZ=y +CONFIG_NO_HZ_COMMON=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NR_CPUS=4 +CONFIG_NUM_ALT_PARTITION=8 +CONFIG_NVMEM=y +CONFIG_OF=y +CONFIG_OF_ADDRESS=y +CONFIG_OF_ADDRESS_PCI=y +CONFIG_OF_EARLY_FLATTREE=y +CONFIG_OF_FLATTREE=y +CONFIG_OF_GPIO=y +CONFIG_OF_IRQ=y +CONFIG_OF_MDIO=y +CONFIG_OF_MTD=y +CONFIG_OF_NET=y +CONFIG_OF_PCI=y +CONFIG_OF_PCI_IRQ=y +CONFIG_OF_RESERVED_MEM=y +CONFIG_OLD_SIGACTION=y +CONFIG_OLD_SIGSUSPEND3=y +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_PANIC_ON_OOPS=y +CONFIG_PANIC_ON_OOPS_VALUE=1 +CONFIG_PANIC_TIMEOUT=5 +CONFIG_PCI=y +# CONFIG_PCIEAER is not set +CONFIG_PCIE_DW=y +# CONFIG_PCIE_DW_PLAT is not set +CONFIG_PCIE_PME=y +CONFIG_PCIE_QCOM=y +CONFIG_PCI_DOMAINS=y +CONFIG_PCI_DOMAINS_GENERIC=y +CONFIG_PCI_MSI=y +CONFIG_PERF_EVENTS=y +CONFIG_PERF_USE_VMALLOC=y +CONFIG_PGTABLE_LEVELS=2 +CONFIG_PHYLIB=y +# CONFIG_PHY_IPQ_BALDUR_USB is not set +# CONFIG_PHY_IPQ_UNIPHY_USB is not set +# CONFIG_PHY_QCOM_APQ8064_SATA is not set +# CONFIG_PHY_QCOM_IPQ806X_SATA is not set +CONFIG_PHY_QCA_PCIE_QMP=y +# CONFIG_PHY_QCOM_UFS is not set +# CONFIG_PHY_IPQ_UNIPHY_PCIE is not set +CONFIG_PINCTRL=y +# CONFIG_PINCTRL_APQ8064 is not set +# CONFIG_PINCTRL_APQ8084 is not set +# CONFIG_PINCTRL_IPQ4019 is not set +# CONFIG_PINCTRL_IPQ6018 is not set +# CONFIG_PINCTRL_IPQ8064 is not set +# CONFIG_PINCTRL_IPQ807x is not set +# CONFIG_PINCTRL_IPQ5018 is not set +CONFIG_PINCTRL_MSM=y +# CONFIG_PINCTRL_MSM8660 is not set +# CONFIG_PINCTRL_MSM8916 is not set +# CONFIG_PINCTRL_MSM8960 is not set +CONFIG_PINCTRL_QCOM_SPMI_PMIC=y +# CONFIG_PINCTRL_QCOM_SSBI_PMIC is not set +# CONFIG_PL330_DMA is not set +CONFIG_PM=y +CONFIG_PM_CLK=y +# CONFIG_PM_DEBUG is not set +CONFIG_PM_GENERIC_DOMAINS=y +CONFIG_PM_GENERIC_DOMAINS_OF=y +CONFIG_PM_GENERIC_DOMAINS_SLEEP=y +CONFIG_PM_OPP=y +CONFIG_PM_SLEEP=y +CONFIG_PM_SLEEP_SMP=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_MSM=y +CONFIG_POWER_SUPPLY=y +CONFIG_PREEMPT=y +CONFIG_PREEMPT_COUNT=y +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_RCU=y +CONFIG_PRINTK_TIME=y +CONFIG_PROC_PAGE_MONITOR=y +# CONFIG_PROC_STRIPPED is not set +CONFIG_PSTORE=y +CONFIG_PSTORE_RAM=y +# CONFIG_PSTORE_CONSOLE is not set +# CONFIG_PSTORE_PMSG is not set +CONFIG_PWM=y +# CONFIG_PWM_IPQ4019 is not set +# CONFIG_PWM_PCA9685 is not set +CONFIG_PWM_SYSFS=y +CONFIG_QCOM_ADM=y +# CONFIG_QCOM_APM is not set +CONFIG_QCOM_BAM_DMA=y +# CONFIG_QTI_BT_TTY is not set +# CONFIG_QCOM_COINCELL is not set +# CONFIG_QCOM_DCC is not set +CONFIG_QCOM_GDSC=y +CONFIG_QCOM_GSBI=y +# CONFIG_QCOM_HFPLL is not set +# CONFIG_QCOM_MEMORY_DUMP_V2 is not set +# CONFIG_QCOM_MDT_LOADER is not set +CONFIG_QCOM_QFPROM=y +# CONFIG_QCOM_SPMI_TEMP_ALARM is not set +CONFIG_QCOM_RPM_CLK=y +# CONFIG_QCOM_RTB is not set +# CONFIG_QCOM_PM is not set +CONFIG_QCOM_SCM=y +CONFIG_QCOM_SCM_32=y +# CONFIG_HAVE_ARM_SMCCC is not set +CONFIG_QCA_SCM_RESTART_REASON=y +CONFIG_IPQ_TCSR=y +CONFIG_QCOM_QFPROM=y +# CONFIG_QCOM_SMD is not set +CONFIG_QCOM_SMEM=y +CONFIG_QCOM_SMEM_STATE=y +# CONFIG_QCOM_SMD is not set +CONFIG_QCOM_SMP2P=y +# CONFIG_QCOM_SPMI_VADC is not set +CONFIG_QCOM_TSENS=y +CONFIG_QCOM_TZ_LOG=y +CONFIG_QCOM_WDT=y +CONFIG_QMI_ENCDEC=y +CONFIG_RATIONAL=y +# CONFIG_RCU_BOOST is not set +CONFIG_RCU_CPU_STALL_TIMEOUT=21 +# CONFIG_RCU_EXPERT is not set +CONFIG_RCU_STALL_COMMON=y +CONFIG_RD_GZIP=y +CONFIG_REGMAP=y +# CONFIG_REGMAP_ALLOW_WRITE_DEBUGFS is not set +CONFIG_REGMAP_MMIO=y +CONFIG_REGULATOR=y +# CONFIG_REGULATOR_CPR3 is not set +CONFIG_REGULATOR_GPIO=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_REGULATOR_QCOM_RPM=y +CONFIG_REGULATOR_QCOM_SPMI=y +# CONFIG_REGULATOR_IPQ40XX is not set +# CONFIG_REGULATOR_RPM_SMD is not set +# CONFIG_REGULATOR_RPM_GLINK is not set +CONFIG_RELAY=y +CONFIG_REMOTEPROC=y +# CONFIG_IPQ807X_REMOTEPROC is not set +CONFIG_RESET_CONTROLLER=y +CONFIG_RFS_ACCEL=y +CONFIG_RPS=y +CONFIG_RTC_CLASS=y +CONFIG_NET_L3_MASTER_DEV=y +# CONFIG_RTC_DRV_CMOS is not set +# CONFIG_RTC_DRV_PM8XXX is not set +CONFIG_RWSEM_SPIN_ON_OWNER=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_SATA_AHCI is not set +CONFIG_SCHED_HRTICK=y +# CONFIG_SCSI is not set +# CONFIG_SCHED_INFO is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SERIAL_8250 is not set +# CONFIG_SERIAL_8250_CONSOLE is not set +# CONFIG_SERIAL_8250_DMA is not set +# CONFIG_SERIAL_AMBA_PL010 is not set +# CONFIG_SERIAL_AMBA_PL011 is not set +CONFIG_SERIAL_MSM=y +CONFIG_SERIAL_MSM_CONSOLE=y +# CONFIG_VIRTIO_CONSOLE is not set +CONFIG_SMP=y +CONFIG_SMP_ON_UP=y +# CONFIG_SND is not set +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_MAX_CARDS=32 +CONFIG_SND_PROC_FS=y +# CONFIG_SND_COMPRESS_OFFLOAD is not set +CONFIG_SND_PCM=y +CONFIG_SND_SOC=y +# CONFIG_SND_SOC_APQ8016_SBC is not set +CONFIG_SND_SOC_I2C_AND_SPI=y +CONFIG_SND_SOC_IPQ=y +# CONFIG_SND_SOC_IPQ806X_LPAIF is not set +# CONFIG_SND_SOC_IPQ806X_PCM_RAW is not set +CONFIG_SND_SOC_IPQ_ADSS=y +CONFIG_SND_SOC_IPQ_CODEC=y +CONFIG_SND_SOC_IPQ_CPU_DAI=y +CONFIG_SND_SOC_IPQ_MBOX=y +CONFIG_SND_SOC_IPQ_PCM_I2S=y +CONFIG_SND_SOC_IPQ_PCM_RAW=y +CONFIG_SND_SOC_IPQ_PCM_SPDIF=y +CONFIG_SND_SOC_IPQ_PCM_TDM=y +CONFIG_SND_SOC_IPQ_STEREO=y +CONFIG_SND_SOC_QCOM=y +# CONFIG_SND_SOC_STORM is not set +CONFIG_SOUND=y +CONFIG_SPARSE_IRQ=y +CONFIG_SPI=y +CONFIG_SPI_MASTER=y +CONFIG_SPI_QUP=y +CONFIG_SPI_SPIDEV=y +# CONFIG_SPI_VSC7385 is not set +CONFIG_SPMI=y +CONFIG_SPMI_MSM_PMIC_ARB=y +CONFIG_SRCU=y +# CONFIG_STRIP_ASM_SYMS is not set +# CONFIG_STOPWATCH is not set +CONFIG_SUSPEND=y +CONFIG_SWCONFIG=y +CONFIG_SWIOTLB=y +# CONFIG_SWAP is not set +CONFIG_SWP_EMULATE=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_THERMAL=y +CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y +CONFIG_THERMAL_GOV_STEP_WISE=y +CONFIG_THERMAL_GOV_USER_SPACE=y +CONFIG_THERMAL_HWMON=y +CONFIG_THERMAL_OF=y +CONFIG_THERMAL_WRITABLE_TRIPS=y +# CONFIG_THUMB2_KERNEL is not set +# CONFIG_TICK_CPU_ACCOUNTING is not set +CONFIG_IRQ_TIME_ACCOUNTING=y +CONFIG_TRACING_EVENTS_GPIO=y +CONFIG_UBIFS_FS=y +CONFIG_UBIFS_FS_ADVANCED_COMPR=y +CONFIG_UBIFS_FS_LZO=y +CONFIG_UBIFS_FS_XZ=y +CONFIG_UBIFS_FS_ZLIB=y +CONFIG_UEVENT_HELPER_PATH="" +CONFIG_UNCOMPRESS_INCLUDE="debug/uncompress.h" +CONFIG_UNINLINE_SPIN_UNLOCK=y +CONFIG_USB_GADGET=n +CONFIG_USB_SUPPORT=y +# CONFIG_USB_DWC3_OF_SIMPLE is not set +# CONFIG_USB_QCOM_8X16_PHY is not set +# CONFIG_USB_QCOM_KS_BRIDGE is not set +# CONFIG_USB_QCOM_QUSB_PHY is not set +# CONFIG_USB_QCOM_QMP_PHY is not set +# CONFIG_USB_QCA_M31_PHY is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +# CONFIG_USB_OHCI_LITTLE_ENDIAN is not set +CONFIG_USE_OF=y +CONFIG_VDSO=y +CONFIG_VECTORS_BASE=0xffff0000 +CONFIG_VFP=y +CONFIG_VFPv3=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_WATCHDOG_CORE=y +# CONFIG_WL_TI is not set +# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set +CONFIG_XPS=y +CONFIG_XZ_DEC_ARM=y +CONFIG_XZ_DEC_BCJ=y +CONFIG_ZBOOT_ROM_BSS=0 +CONFIG_ZBOOT_ROM_TEXT=0 +CONFIG_ZLIB_DEFLATE=y +CONFIG_ZLIB_INFLATE=y +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_QCOM_CACHE_DUMP=y +CONFIG_QCOM_CACHE_DUMP_ON_PANIC=y +# CONFIG_QCOM_RESTART_REASON is not set +# CONFIG_QCOM_DLOAD_MODE is not set +CONFIG_FW_AUTH=y +CONFIG_FW_AUTH_TEST=m +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_PUBLIC_KEY_ALGO_RSA=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=n +CONFIG_KEYS=y +# CONFIG_SKB_RECYCLER is not set +CONFIG_SKB_RECYCLER_MULTI_CPU=y +# CONFIG_SKB_RECYCLER_PREALLOC is not set +# CONFIG_U_SERIAL_CONSOLE is not set +CONFIG_SCSI_SCAN_ASYNC=y +# CONFIG_NF_IPV6_DUMMY_HEADER is not set +# CONFIG_RMNET is not set +# CONFIG_RMNET_DATA is not set +# CONFIG_RMNET_CTL is not set +# CONFIG_MSM_SECURE_BUFFER is not set +# CONFIG_STAGING is not set +# CONFIG_ANDROID is not set +# CONFIG_ION is not set +# CONFIG_ION_DUMMY is not set +# CONFIG_ION_MSM is not set +# CONFIG_ION_TEST is not set +# CONFIG_CMA is not set +# CONFIG_CMA_DEBUG is not set +# CONFIG_CMA_DEBUGFS is not set +# CONFIG_DMA_CMA is not set +# CONFIG_CMA_AREAS is not set +# CONFIG_CMA_SIZE_MBYTES is not set +# CONFIG_CMA_SIZE_SEL_MBYTES is not set +# CONFIG_CMA_SIZE_SEL_PERCENTAGE is not set +# CONFIG_CMA_SIZE_SEL_MIN is not set +# CONFIG_CMA_SIZE_SEL_MAX is not set +# CONFIG_CMA_ALIGNMENT is not set +# CONFIG_ASHMEM is not set +# CONFIG_ANDROID_TIMED_OUTPUT is not set +# CONFIG_ANDROID_LOW_MEMORY_KILLER is not set +# CONFIG_SYNC is not set +# CONFIG_SW_SYNC is not set +# CONFIG_FSL_MC_BUS is not set +# CONFIG_ALLOC_BUFFERS_IN_4K_CHUNKS is not set +CONFIG_ALLOC_SKB_PAGE_FRAG_DISABLE=y +# CONFIG_MMAP_ALLOW_UNINITIALIZED is not set +# CONFIG_MAILBOX is not set +# CONFIG_MAILBOX_TEST is not set +# CONFIG_QCOM_APCS_IPC is not set +# CONFIG_QCOM_GLINK_SSR is not set +# CONFIG_QCOM_Q6V5_WCSS is not set +# CONFIG_QCOM_SYSMON is not set +# CONFIG_QRTR is not set +# CONFIG_QRTR_SMD is not set +# CONFIG_QRTR_TUN is not set +# CONFIG_RPMSG is not set +# CONFIG_RPMSG_QCOM_GLINK_RPM is not set +# CONFIG_RPMSG_VIRTIO is not set +# CONFIG_RPMSG_CHAR is not set +# CONFIG_RPMSG_QCOM_GLINK_SMEM is not set +# CONFIG_RPMSG_QCOM_SMD is not set +CONFIG_QCA_MINIDUMP=y +# CONFIG_QCA_MINIDUMP_DEBUG is not set +# CONFIG_QRTR_USB is not set +# CONFIG_QRTR_FIFO is not set +CONFIG_QRTR_MHI=y +CONFIG_MHI_BUS=y +# CONFIG_MHI_QTI is not set +# CONFIG_MHI_NETDEV is not set +# CONFIG_MHI_DEBUG is not set +# CONFIG_MHI_UCI is not set +# CONFIG_MHI_SATELLITE is not set +# CONFIG_DIAG_OVER_QRTR is not set +# CONFIG_MSM_ADSPRPC is not set +CONFIG_CRYPTO_MICHAEL_MIC=y +# CONFIG_ARCH_HAS_KCOV is not set +# CONFIG_KCOV is not set +# CONFIG_GCC_PLUGINS is not set +# CONFIG_QTI_Q6V5_ADSP is not set +# CONFIG_MSM_RPM_RPMSG is not set +# CONFIG_RPMSG_QCOM_GLINK_RPM is not set +# CONFIG_REGULATOR_RPM_GLINK is not set +# CONFIG_MTD_NAND_SERIAL is not set +# CONFIG_ARM_QTI_IPQ60XX_CPUFREQ is not set +# CONFIG_PAGE_SCOPE_MULTI_PAGE_READ is not set +# CONFIG_CRYPTO_NO_ZERO_LEN_HASH is not set +# CONFIG_CRYPTO_DISABLE_AES192_TEST is not set +# CONFIG_QTI_EUD is not set +# CONFIG_EUD_EXTCON_SUPPORT is not set +# CONFIG_CLK_TEST_5018 is not set +CONFIG_MAP_E_SUPPORT=y +# CONFIG_IPQ_FLASH_16M_PROFILE is not set +# CONFIG_QGIC2_MSI is not set +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_LEDS_GPIO=y +# CONFIG_ARCH_IPQ256M is not set +CONFIG_SKB_FIXED_SIZE_2K=y +# CONFIG_IPQ_MEM_PROFILE is not set +# CONFIG_VIRTIO_NET is not set +# CONFIG_QCA_85XX_SWITCH is not set +CONFIG_AQ_PHY=y +CONFIG_DIAG_CHAR=y +# CONFIG_HW_RANDOM_VIRTIO is not set +# CONFIG_BOOTCONFIG_PARTITION is not set +# CONFIG_CRYPTO_DEV_QCEDEV is not set +# CONFIG_CRYPTO_DEV_QCRYPTO is not set +# CONFIG_MHI_BUS_TEST is not set diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-cig-wf188.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-cig-wf188.dts new file mode 100644 index 0000000000..5f2d4fb03f --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-cig-wf188.dts @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2019, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "../../../arm64/boot/dts/qcom/qcom-ipq6018-cig-wf188.dts" +#include "qcom-ipq6018.dtsi" diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-cig-wf188n.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-cig-wf188n.dts new file mode 100644 index 0000000000..ffd7899228 --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-cig-wf188n.dts @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2019, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "../../../arm64/boot/dts/qcom/qcom-ipq6018-cig-wf188n.dts" +#include "qcom-ipq6018.dtsi" diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-edgecore-eap101.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-edgecore-eap101.dts new file mode 100644 index 0000000000..531510a764 --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-edgecore-eap101.dts @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2019, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "../../../arm64/boot/dts/qcom/qcom-ipq6018-edgecore-eap101.dts" +#include "qcom-ipq6018.dtsi" diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-miwifi-ax1800.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-miwifi-ax1800.dts new file mode 100644 index 0000000000..5c452c5741 --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-miwifi-ax1800.dts @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2019, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "../../../arm64/boot/dts/qcom/qcom-ipq6018-miwifi-ax1800.dts" +#include "qcom-ipq6018.dtsi" diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-wallys-dr6018.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-wallys-dr6018.dts new file mode 100644 index 0000000000..1250eb19f8 --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq6018-wallys-dr6018.dts @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "../../../arm64/boot/dts/qcom/qcom-ipq6018-wallys-dr6018.dts" +#include "qcom-ipq6018.dtsi" diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-eap102.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-eap102.dts new file mode 100644 index 0000000000..d1d3f3755e --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-eap102.dts @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "../../../arm64/boot/dts/qcom/qcom-ipq807x-eap102.dts" + +/ { + soc { + pmu { + compatible = "arm,cortex-a7-pmu"; + interrupts = ; + }; + }; +}; diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-eap106.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-eap106.dts new file mode 100644 index 0000000000..1527f81a40 --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-eap106.dts @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "../../../arm64/boot/dts/qcom/qcom-ipq807x-eap106.dts" + +/ { + soc { + pmu { + compatible = "arm,cortex-a7-pmu"; + interrupts = ; + }; + }; +}; diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-ex227.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-ex227.dts new file mode 100644 index 0000000000..f3b25e2633 --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-ex227.dts @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "../../../arm64/boot/dts/qcom/qcom-ipq807x-ex227.dts" + +/ { + soc { + pmu { + compatible = "arm,cortex-a7-pmu"; + interrupts = ; + }; + }; +}; diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-ex447.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-ex447.dts new file mode 100644 index 0000000000..7cfd66ba0a --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-ex447.dts @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "../../../arm64/boot/dts/qcom/qcom-ipq807x-ex447.dts" + +/ { + soc { + pmu { + compatible = "arm,cortex-a7-pmu"; + interrupts = ; + }; + }; +}; diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-sercomm-wallaby.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-sercomm-wallaby.dts new file mode 100644 index 0000000000..e756aa5622 --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-sercomm-wallaby.dts @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "../../../arm64/boot/dts/qcom/qcom-ipq807x-sercomm-wallaby.dts" + +/ { + soc { + pmu { + compatible = "arm,cortex-a7-pmu"; + interrupts = ; + }; + }; +}; diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-wf194c.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-wf194c.dts new file mode 100644 index 0000000000..4274b6c704 --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-wf194c.dts @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "../../../arm64/boot/dts/qcom/qcom-ipq807x-wf194c.dts" + +/ { + soc { + pmu { + compatible = "arm,cortex-a7-pmu"; + interrupts = ; + }; + }; +}; diff --git a/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-wf194c4.dts b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-wf194c4.dts new file mode 100644 index 0000000000..372f29df38 --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm/boot/dts/qcom-ipq807x-wf194c4.dts @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "../../../arm64/boot/dts/qcom/qcom-ipq807x-wf194c4.dts" + +/ { + soc { + pmu { + compatible = "arm,cortex-a7-pmu"; + interrupts = ; + }; + }; +}; diff --git a/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq6018-miwifi-ax1800.dts b/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq6018-miwifi-ax1800.dts new file mode 100755 index 0000000000..ba4a5ef3ca --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq6018-miwifi-ax1800.dts @@ -0,0 +1,419 @@ +/dts-v1/; +/* + * Copyright (c) 2019, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define __IPQ_MEM_PROFILE_256_MB__ +#include "qcom-ipq6018.dtsi" +#include "qcom-ipq6018-rpm-regulator.dtsi" +#include "qcom-ipq6018-cpr-regulator.dtsi" +#include "qcom-ipq6018-cp-cpu.dtsi" +#include +/ { + #address-cells = <0x2>; + #size-cells = <0x2>; + model = "Qualcomm Technologies, Inc. IPQ6018/AP-CP03-C1"; + compatible = "xiaomi,ax1800", "qcom,ipq6018-cp03", "qcom,ipq6018"; + interrupt-parent = <&intc>; + qcom,msm-id = <0x1A5 0x0>; + + aliases { + /* + * Aliases as required by u-boot + * to patch MAC addresses + */ + ethernet1 = "/soc/dp2"; + ethernet2 = "/soc/dp3"; + ethernet3 = "/soc/dp4"; + ethernet4 = "/soc/dp5"; + }; + + chosen { + bootargs = "console=ttyMSM0,115200,n8 rw init=/init"; + bootargs-append = " swiotlb=1 coherent_pool=2M console=ttyMSM0,115200,n8"; + }; + + /* + * +=========+==============+========================+ + * | | | | + * | Region | Start Offset | Size | + * | | | | + * +--------+--------------+-------------------------+ + * | | | | + * | | | | + * | | | | + * | | | | + * | Linux | 0x41000000 | 139MB | + * | | | | + * | | | | + * | | | | + * +--------+--------------+-------------------------+ + * | TZ App | 0x49B00000 | 6MB | + * +--------+--------------+-------------------------+ + * + * From the available 145 MB for Linux in the first 256 MB, + * we are reserving 6 MB for TZAPP. + * + * Refer arch/arm64/boot/dts/qcom/qcom-ipq6018-memory.dtsi + * for memory layout. + */ + +}; + +&tlmm { + uart_pins: uart_pins { + mux { + pins = "gpio44", "gpio45"; + function = "blsp2_uart"; + drive-strength = <8>; + bias-pull-down; + }; + }; + + spi_0_pins: spi_0_pins { + mux { + pins = "gpio38", "gpio39", "gpio40", "gpio41"; + function = "blsp0_spi"; + drive-strength = <8>; + bias-pull-down; + }; + }; + + qpic_pins: qpic_pins { + data_0 { + pins = "gpio15"; + function = "qpic_pad0"; + drive-strength = <8>; + bias-pull-down; + }; + data_1 { + pins = "gpio12"; + function = "qpic_pad1"; + drive-strength = <8>; + bias-pull-down; + }; + data_2 { + pins = "gpio13"; + function = "qpic_pad2"; + drive-strength = <8>; + bias-pull-down; + }; + data_3 { + pins = "gpio14"; + function = "qpic_pad3"; + drive-strength = <8>; + bias-pull-down; + }; + data_4 { + pins = "gpio5"; + function = "qpic_pad4"; + drive-strength = <8>; + bias-pull-down; + }; + data_5 { + pins = "gpio6"; + function = "qpic_pad5"; + drive-strength = <8>; + bias-pull-down; + }; + data_6 { + pins = "gpio7"; + function = "qpic_pad6"; + drive-strength = <8>; + bias-pull-down; + }; + data_7 { + pins = "gpio8"; + function = "qpic_pad7"; + drive-strength = <8>; + bias-pull-down; + }; + qpic_pad { + pins = "gpio1", "gpio3", "gpio4", + "gpio10", "gpio11", "gpio17"; + function = "qpic_pad"; + drive-strength = <8>; + bias-pull-down; + }; + }; + + button_pins: button_pins { + wps_button { + pins = "gpio19"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + }; + + mdio_pins: mdio_pinmux { + mux_0 { + pins = "gpio64"; + function = "mdc"; + drive-strength = <8>; + bias-pull-up; + }; + mux_1 { + pins = "gpio65"; + function = "mdio"; + drive-strength = <8>; + bias-pull-up; + }; + mux_2 { + pins = "gpio75"; + function = "gpio"; + bias-pull-up; + }; + }; + + gpio_keys { + pinctrl-names = "default"; + compatible = "gpio-keys"; + pinctrl-0 = <&button_pins>; + + button@1 { + debounce-interval = <0x3c>; + label = "reset"; + linux,code = ; + linux,input-type = <0x01>; + gpios = <&tlmm 0x13 GPIO_ACTIVE_LOW>; + }; + }; + leds_pins: leds_pins { + led_blue { + pins = "gpio31"; + bias-pull-down; + function = "gpio"; + drive-strength = <0x08>; + }; + + net_blue { + pins = "gpio29"; + bias-pull-down; + function = "gpio"; + drive-strength = <0x08>; + }; + + net_yellow { + pins = "gpio30"; + bias-pull-down; + function = "gpio"; + drive-strength = <0x08>; + }; + + led_yellow { + pins = "gpio32"; + bias-pull-down; + function = "gpio"; + drive-strength = <0x08>; + }; + }; +}; + +&soc { + mdio@90000 { + pinctrl-0 = <&mdio_pins>; + pinctrl-names = "default"; + phy-reset-gpio = <&tlmm 75 0>; + status = "ok"; + ethernet-phy@0 { + reg = <0x00>; + }; + + ethernet-phy@1 { + reg = <0x01>; + }; + + ethernet-phy@4 { + reg = <0x04>; + }; + + ethernet-phy@2 { + reg = <0x02>; + }; + + ethernet-phy@3 { + reg = <0x03>; + }; + }; + + ess-switch@3a000000 { + switch_cpu_bmp = <0x1>; /* cpu port bitmap */ + switch_lan_bmp = <0x1e>; /* lan port bitmap */ + switch_wan_bmp = <0x20>; /* wan port bitmap */ + switch_inner_bmp = <0xc0>; /*inner port bitmap*/ + switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ + switch_mac_mode1 = <0xff>; /* mac mode for uniphy instance1*/ + switch_mac_mode2 = <0xff>; /* mac mode for uniphy instance2*/ + qcom,port_phyinfo { + port@0 { + port_id = <0x01>; + phy_address = <0x00>; + }; + + port@4 { + port_id = <0x05>; + phy_address = <0x04>; + }; + + port@2 { + port_id = <0x03>; + phy_address = <0x02>; + }; + + port@1 { + port_id = <0x02>; + phy_address = <0x01>; + }; + + port@3 { + port_id = <0x04>; + phy_address = <0x03>; + }; + + }; + }; + + dp2 { + reg = <0x3a001200 0x200>; + qcom,id = <0x02>; + qcom,phy-mdio-addr = <0x01>; + qcom,link-poll = <0x01>; + qcom,mactype = <0x00>; + compatible = "qcom,nss-dp"; + phy-mode = "sgmii"; + local-mac-address = [000000000000]; + device_type = "network"; + }; + + dp3 { + reg = <0x3a001400 0x200>; + qcom,id = <0x03>; + qcom,phy-mdio-addr = <0x02>; + qcom,link-poll = <0x01>; + qcom,mactype = <0x00>; + compatible = "qcom,nss-dp"; + phy-mode = "sgmii"; + local-mac-address = [000000000000]; + device_type = "network"; + }; + + dp4 { + reg = <0x3a001600 0x200>; + qcom,id = <0x04>; + qcom,phy-mdio-addr = <0x03>; + qcom,link-poll = <0x01>; + qcom,mactype = <0x00>; + compatible = "qcom,nss-dp"; + phy-mode = "sgmii"; + local-mac-address = [000000000000]; + device_type = "network"; + }; + + dp5 { + reg = <0x3a001800 0x200>; + qcom,id = <0x05>; + qcom,phy-mdio-addr = <0x04>; + qcom,link-poll = <0x01>; + qcom,mactype = <0x00>; + compatible = "qcom,nss-dp"; + phy-mode = "sgmii"; + local-mac-address = [8c 53 c3 b6 7d ac]; + device_type = "network"; + }; + + leds { + compatible = "gpio-leds"; + pinctrl-0 = <&leds_pins>; + pinctrl-names = "default"; + + led_sys_yellow { + label = "yellow:status"; + default-state = "on"; + gpio = <&tlmm 0x1e GPIO_ACTIVE_HIGH>; + }; + + led_net_yellow { + label = "yellow:net"; + default-state = "off"; + gpio = <&tlmm 0x20 GPIO_ACTIVE_HIGH>; + }; + + led_sys_blue { + label = "blue:status"; + default-state = "off"; + gpio = <&tlmm 0x1d GPIO_ACTIVE_HIGH>; + }; + + led_net_blue { + label = "blue:net"; + default-state = "off"; + gpio = <&tlmm 0x1f GPIO_ACTIVE_HIGH>; + }; + }; +}; + +&blsp1_uart3 { + pinctrl-0 = <&uart_pins>; + pinctrl-names = "default"; + status = "ok"; +}; + +&spi_0 { + pinctrl-0 = <&spi_0_pins>; + pinctrl-names = "default"; + cs-select = <0>; + status = "ok"; + + m25p80@0 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0>; + compatible = "n25q128a11"; + linux,modalias = "m25p80", "n25q128a11"; + spi-max-frequency = <50000000>; + use-default-sizes; + }; +}; + +&qpic_bam { + status = "ok"; +}; + +&nand { + pinctrl-0 = <&qpic_pins>; + pinctrl-names = "default"; + status = "ok"; +}; + +&ssphy_0 { + status = "ok"; +}; + +&qusb_phy_0 { + status = "ok"; +}; + +&usb3 { + status = "ok"; +}; + +&nss_crypto { + status = "ok"; +}; + +&q6_region { + reg = <0x0 0x4ab00000 0x0 0x05500000>; +}; diff --git a/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq6018-wallys-dr6018.dts b/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq6018-wallys-dr6018.dts new file mode 100755 index 0000000000..12065b62f6 --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq6018-wallys-dr6018.dts @@ -0,0 +1,441 @@ +/dts-v1/; +/* + * Copyright (c) 2019, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "qcom-ipq6018.dtsi" +#include "qcom-ipq6018-rpm-regulator.dtsi" +#include "qcom-ipq6018-cpr-regulator.dtsi" +#include "qcom-ipq6018-cp-cpu.dtsi" +#include +#include + +/ { + #address-cells = <0x2>; + #size-cells = <0x2>; + model = "Wallys DR6018"; + compatible = "wallys,dr6018", "qcom,ipq6018-cp01", "qcom,ipq6018"; + interrupt-parent = <&intc>; + + aliases { + serial0 = &blsp1_uart3; + serial1 = &blsp1_uart2; + + /* + * Aliases as required by u-boot + * to patch MAC addresses + */ + ethernet0 = "/soc/dp1"; + ethernet1 = "/soc/dp2"; + + led-boot = &led_power; + led-failsafe = &led_power; + led-running = &led_power; + led-upgrade = &led_power; + }; + + chosen { + bootargs = "console=ttyMSM0,115200,n8 rw init=/init"; + bootargs-append = " console=ttyMSM0,115200,n8 swiotlb=1 coherent_pool=2M"; + }; + + /* + * +=========+==============+========================+ + * | | | | + * | Region | Start Offset | Size | + * | | | | + * +--------+--------------+-------------------------+ + * | | | | + * | | | | + * | | | | + * | | | | + * | Linux | 0x41000000 | 139MB | + * | | | | + * | | | | + * | | | | + * +--------+--------------+-------------------------+ + * | TZ App | 0x49B00000 | 6MB | + * +--------+--------------+-------------------------+ + * + * From the available 145 MB for Linux in the first 256 MB, + * we are reserving 6 MB for TZAPP. + * + * Refer arch/arm64/boot/dts/qcom/qcom-ipq6018-memory.dtsi + * for memory layout. + */ + +/* TZAPP is enabled only in default memory profile */ +#if !defined(__IPQ_MEM_PROFILE_256_MB__) && !defined(__IPQ_MEM_PROFILE_512_MB__) + reserved-memory { + tzapp:tzapp@49B00000 { /* TZAPPS */ + no-map; + reg = <0x0 0x49B00000 0x0 0x00600000>; + }; + }; +#endif +}; + +&tlmm { + uart_pins: uart_pins { + mux { + pins = "gpio44", "gpio45"; + function = "blsp2_uart"; + drive-strength = <8>; + bias-pull-down; + }; + }; + + spi_0_pins: spi_0_pins { + mux { + pins = "gpio38", "gpio39", "gpio40", "gpio41"; + function = "blsp0_spi"; + drive-strength = <8>; + bias-pull-down; + }; + }; + + qpic_pins: qpic_pins { + data_0 { + pins = "gpio15"; + function = "qpic_pad0"; + drive-strength = <8>; + bias-pull-down; + }; + data_1 { + pins = "gpio12"; + function = "qpic_pad1"; + drive-strength = <8>; + bias-pull-down; + }; + data_2 { + pins = "gpio13"; + function = "qpic_pad2"; + drive-strength = <8>; + bias-pull-down; + }; + data_3 { + pins = "gpio14"; + function = "qpic_pad3"; + drive-strength = <8>; + bias-pull-down; + }; + data_4 { + pins = "gpio5"; + function = "qpic_pad4"; + drive-strength = <8>; + bias-pull-down; + }; + data_5 { + pins = "gpio6"; + function = "qpic_pad5"; + drive-strength = <8>; + bias-pull-down; + }; + data_6 { + pins = "gpio7"; + function = "qpic_pad6"; + drive-strength = <8>; + bias-pull-down; + }; + data_7 { + pins = "gpio8"; + function = "qpic_pad7"; + drive-strength = <8>; + bias-pull-down; + }; + qpic_pad { + pins = "gpio1", "gpio3", "gpio4", + "gpio10", "gpio11", "gpio17"; + function = "qpic_pad"; + drive-strength = <8>; + bias-pull-down; + }; + }; + + extcon_usb_pins: extcon_usb_pins { + mux { + pins = "gpio26"; + function = "gpio"; + drive-strength = <2>; + bias-pull-down; + }; + }; + + button_pins: button_pins { + wps_button { + pins = "gpio19"; + function = "gpio"; + drive-strength = <8>; + bias-pull-up; + }; + }; + + mdio_pins: mdio_pinmux { + mux_0 { + pins = "gpio64"; + function = "mdc"; + drive-strength = <8>; + bias-pull-up; + }; + mux_1 { + pins = "gpio65"; + function = "mdio"; + drive-strength = <8>; + bias-pull-up; + }; + mux_2 { + pins = "gpio75"; + function = "gpio"; + bias-pull-up; + }; + }; + + leds_pins: leds_pins { + led_pwr { + pins = "gpio74"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + led_5g { + pins = "gpio35"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + led_2g { + pins = "gpio37"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + }; + uart2_pins: uart2_pins { + mux { + pins = "gpio57", "gpio58"; + function = "blsp4_uart"; + drive-strength = <8>; + bias-pull-down; + }; + }; +}; + +&soc { + extcon_usb: extcon_usb { + pinctrl-0 = <&extcon_usb_pins>; + pinctrl-names = "default"; + id-gpio = <&tlmm 26 GPIO_ACTIVE_LOW>; + status = "ok"; + }; + + mdio: mdio@90000 { + pinctrl-0 = <&mdio_pins>; + pinctrl-names = "default"; + phy-reset-gpio = <&tlmm 75 0 &tlmm 77 1>; + status = "ok"; + ethernet-phy@3 { + reg = <0x03>; + }; + + ethernet-phy@4 { + reg = <0x18>; + }; + + ethernet-phy@1 { + reg = <0x01>; + }; + + ethernet-phy@2 { + reg = <0x02>; + }; + + ethernet-phy@0 { + reg = <0x00>; + }; + }; + + dp1 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <1>; + reg = <0x3a001000 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <0>; + phy-mode = "sgmii"; + }; + + dp2 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <2>; + reg = <0x3a001200 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <1>; + phy-mode = "sgmii"; + }; + + ess-switch@3a000000 { + switch_cpu_bmp = <0x1>; /* cpu port bitmap */ + switch_lan_bmp = <0x4>; /* lan port bitmap */ + switch_wan_bmp = <0x2>; /* wan port bitmap */ + switch_inner_bmp = <0xc0>; /*inner port bitmap*/ + switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ + switch_mac_mode1 = <0xf>; /* mac mode for uniphy instance1*/ + switch_mac_mode2 = <0xff>; /* mac mode for uniphy instance2*/ + qcom,port_phyinfo { + port@1 { + phy_address = <0x01>; + port_id = <0x02>; + }; + + port@0 { + phy_address = <0x00>; + port_id = <0x01>; + }; + + port@2 { + phy_address = <0x02>; + port_id = <0x03>; + }; + + port@3 { + phy_address = <0x03>; + port_id = <0x04>; + }; + + port@4 { + phy_address = <0x18>; + port_id = <0x05>; + port_mac_sel = "QGMAC_PORT"; + }; + }; + }; + + gpio_keys { + compatible = "gpio-keys"; + pinctrl-0 = <&button_pins>; + pinctrl-names = "default"; + + reset { + label = "reset"; + linux,code = ; + gpios = <&tlmm 19 GPIO_ACTIVE_LOW>; + linux,input-type = <1>; + debounce-interval = <60>; + }; + + /* wps { + label = "wps"; + linux,code = <>; + gpios = <&tlmm 9 GPIO_ACTIVE_LOW>; + linux,input-type = <1>; + debounce-interval = <60>; + };*/ + }; + + leds { + compatible = "gpio-leds"; + pinctrl-0 = <&leds_pins>; + pinctrl-names = "default"; + + led@25 { + label = "green:wifi5"; + gpios = <&tlmm 35 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "green:5g"; + default-state = "off"; + }; + led@24 { + label = "green:wifi2"; + gpios = <&tlmm 37 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "green:2g"; + default-state = "off"; + }; + led_power: led@16 { + label = "green:led_pwr"; + gpios = <&tlmm 50 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "green:power"; + default-state = "off"; + }; + }; +}; + +&blsp1_uart3 { + pinctrl-0 = <&uart_pins>; + pinctrl-names = "default"; + status = "ok"; +}; + +&spi_0 { + pinctrl-0 = <&spi_0_pins>; + pinctrl-names = "default"; + cs-select = <0>; + status = "ok"; + + m25p80@0 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0>; + compatible = "n25q128a11"; + linux,modalias = "m25p80", "n25q128a11"; + spi-max-frequency = <50000000>; + use-default-sizes; + }; +}; + +&blsp1_uart2 { + pinctrl-0 = <&uart2_pins>; + pinctrl-names = "default"; + dmas = <&blsp_dma 2>, + <&blsp_dma 3>; + dma-names = "tx", "rx"; + status = "ok"; +}; +&qpic_bam { + status = "ok"; +}; + +&nand { + pinctrl-0 = <&qpic_pins>; + pinctrl-names = "default"; + status = "ok"; +}; + +&ssphy_0 { + status = "ok"; +}; + +&qusb_phy_0 { + status = "ok"; +}; + +&qusb_phy_1 { + status = "ok"; +}; + +&usb2 { + status = "ok"; +}; + +&usb3 { + status = "ok"; +}; + +&nss_crypto { + status = "ok"; +}; diff --git a/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq807x-eap102.dts b/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq807x-eap102.dts new file mode 100755 index 0000000000..d04cb1020c --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq807x-eap102.dts @@ -0,0 +1,918 @@ +/dts-v1/; +/* + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "qcom-ipq807x-soc.dtsi" +#include "qcom-ipq807x-ac-cpu.dtsi" + +/ { + #address-cells = <0x2>; + #size-cells = <0x2>; + model = "Edgecore EAP102"; + compatible = "edgecore,eap102", "qcom,ipq807x-ac02", "qcom,ipq807x"; + qcom,msm-id = <0x178 0x0>; + interrupt-parent = <&intc>; + qcom,board-id = <0x8 0x0>; + qcom,pmic-id = <0x0 0x0 0x0 0x0>; + + aliases { + /* + * Aliases as required by u-boot + * to patch MAC addresses + */ + ethernet1 = "/soc/dp5"; + ethernet0 = "/soc/dp6"; + + led-boot = &led_power; + led-failsafe = &led_power; + led-running = &led_power; + led-upgrade = &led_power; + }; + + chosen { + bootargs = "console=ttyMSM0,115200,n8 root=/dev/ram0 rw \ + init=/init"; + #ifdef __IPQ_MEM_PROFILE_256_MB__ + bootargs-append = " swiotlb=1"; + #else + bootargs-append = " swiotlb=1 coherent_pool=2M"; + #endif + }; +}; + +&tlmm { + pinctrl-0 = <&btcoex_pins>; + pinctrl-names = "default"; + + btcoex_pins: btcoex_pins { + mux_0 { + pins = "gpio64"; + function = "pta1_1"; + drive-strength = <6>; + bias-pull-down; + }; + mux_1 { + pins = "gpio65"; + function = "pta1_2"; + drive-strength = <6>; + bias-pull-down; + }; + }; + + mdio_pins: mdio_pinmux { + mux_0 { + pins = "gpio68"; + function = "mdc"; + drive-strength = <8>; + bias-pull-up; + }; + mux_1 { + pins = "gpio69"; + function = "mdio"; + drive-strength = <8>; + bias-pull-up; + }; + mux_2 { + pins = "gpio33"; + function = "gpio"; + bias-pull-up; + }; + mux_3 { + pins = "gpio44"; + function = "gpio"; + bias-pull-up; + }; + }; + + uart_pins: uart_pins { + mux { + pins = "gpio23", "gpio24"; + function = "blsp4_uart1"; + drive-strength = <8>; + bias-disable; + }; + }; + + spi_0_pins: spi_0_pins { + mux { + pins = "gpio38", "gpio39", "gpio40", "gpio41"; + function = "blsp0_spi"; + drive-strength = <8>; + bias-disable; + }; + }; + + qpic_pins: qpic_pins { + data_0 { + pins = "gpio15"; + function = "qpic_pad0"; + drive-strength = <8>; + bias-disable; + }; + data_1 { + pins = "gpio12"; + function = "qpic_pad1"; + drive-strength = <8>; + bias-disable; + }; + data_2 { + pins = "gpio13"; + function = "qpic_pad2"; + drive-strength = <8>; + bias-disable; + }; + data_3 { + pins = "gpio14"; + function = "qpic_pad3"; + drive-strength = <8>; + bias-disable; + }; + data_4 { + pins = "gpio5"; + function = "qpic_pad4"; + drive-strength = <8>; + bias-disable; + }; + data_5 { + pins = "gpio6"; + function = "qpic_pad5"; + drive-strength = <8>; + bias-disable; + }; + data_6 { + pins = "gpio7"; + function = "qpic_pad6"; + drive-strength = <8>; + bias-disable; + }; + data_7 { + pins = "gpio8"; + function = "qpic_pad7"; + drive-strength = <8>; + bias-disable; + }; + qpic_pad { + pins = "gpio1", "gpio3", "gpio4", + "gpio10", "gpio11", "gpio17"; + function = "qpic_pad"; + drive-strength = <8>; + bias-disable; + }; + }; + + hsuart_pins: hsuart_pins { + mux { + pins = "gpio49"; + function = "blsp2_uart"; + drive-strength = <8>; + bias-disable; + }; + }; + + button_pins: button_pins { + + reset_button { + pins = "gpio66"; + function = "gpio"; + drive-strength = <8>; + bias-pull-up; + }; + }; + + led_pins: led_pins { + led_pwr { + pins = "gpio46"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + + led_2g { + pins = "gpio47"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + + led_5g { + pins = "gpio48"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + + led_bt { + pins = "gpio50"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + }; + + usb_mux_sel_pins: usb_mux_pins { + mux { + pins = "gpio27"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + }; + + pcie0_pins: pcie_pins { + pcie0_rst { + pins = "gpio58"; + function = "pcie0_rst"; + drive-strength = <8>; + bias-pull-down; + }; + pcie0_wake { + pins = "gpio59"; + function = "pcie0_wake"; + drive-strength = <8>; + bias-pull-down; + }; + }; + +}; + +&soc { + gpio_keys { + compatible = "gpio-keys"; + pinctrl-0 = <&button_pins>; + pinctrl-names = "default"; + + button@1 { + label = "reset_button"; + linux,code = ; + gpios = <&tlmm 66 GPIO_ACTIVE_LOW>; + linux,input-type = <1>; + debounce-interval = <60>; + }; + }; + + mdio: mdio@90000 { + pinctrl-0 = <&mdio_pins>; + pinctrl-names = "default"; + phy-reset-gpio = <&tlmm 37 0 &tlmm 25 1 &tlmm 44 1>; + compatible = "qcom,ipq40xx-mdio", "qcom,qca-mdio"; + phy0: ethernet-phy@0 { + reg = <0>; + }; + phy1: ethernet-phy@1 { + reg = <1>; + }; + phy2: ethernet-phy@2 { + reg = <2>; + }; + phy3: ethernet-phy@3 { + reg = <3>; + }; + phy4: ethernet-phy@4 { + reg = <24>; + }; + phy5: ethernet-phy@5 { + reg = <28>; + }; + }; + + ess-switch@3a000000 { + switch_cpu_bmp = <0x1>; /* cpu port bitmap */ + switch_lan_bmp = <0x3e>; /* lan port bitmap */ + switch_wan_bmp = <0x40>; /* wan port bitmap */ + switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ + switch_mac_mode1 = <0xf>; /* mac mode for uniphy instance1*/ + switch_mac_mode2 = <0xf>; /* mac mode for uniphy instance2*/ + bm_tick_mode = <0>; /* bm tick mode */ + tm_tick_mode = <0>; /* tm tick mode */ + qcom,port_phyinfo { + port@0 { + port_id = <1>; + phy_address = <0>; + }; + port@1 { + port_id = <2>; + phy_address = <1>; + }; + port@2 { + port_id = <3>; + phy_address = <2>; + }; + port@3 { + port_id = <4>; + phy_address = <3>; + }; + port@4 { + port_id = <5>; + phy_address = <24>; + port_mac_sel = "QGMAC_PORT"; + }; + port@5 { + port_id = <6>; + phy_address = <28>; + port_mac_sel = "QGMAC_PORT"; + }; + }; + port_scheduler_resource { + port@0 { + port_id = <0>; + ucast_queue = <0 143>; + mcast_queue = <256 271>; + l0sp = <0 35>; + l0cdrr = <0 47>; + l0edrr = <0 47>; + l1cdrr = <0 7>; + l1edrr = <0 7>; + }; + port@1 { + port_id = <1>; + ucast_queue = <144 159>; + mcast_queue = <272 275>; + l0sp = <36 39>; + l0cdrr = <48 63>; + l0edrr = <48 63>; + l1cdrr = <8 11>; + l1edrr = <8 11>; + }; + port@2 { + port_id = <2>; + ucast_queue = <160 175>; + mcast_queue = <276 279>; + l0sp = <40 43>; + l0cdrr = <64 79>; + l0edrr = <64 79>; + l1cdrr = <12 15>; + l1edrr = <12 15>; + }; + port@3 { + port_id = <3>; + ucast_queue = <176 191>; + mcast_queue = <280 283>; + l0sp = <44 47>; + l0cdrr = <80 95>; + l0edrr = <80 95>; + l1cdrr = <16 19>; + l1edrr = <16 19>; + }; + port@4 { + port_id = <4>; + ucast_queue = <192 207>; + mcast_queue = <284 287>; + l0sp = <48 51>; + l0cdrr = <96 111>; + l0edrr = <96 111>; + l1cdrr = <20 23>; + l1edrr = <20 23>; + }; + port@5 { + port_id = <5>; + ucast_queue = <208 223>; + mcast_queue = <288 291>; + l0sp = <52 55>; + l0cdrr = <112 127>; + l0edrr = <112 127>; + l1cdrr = <24 27>; + l1edrr = <24 27>; + }; + port@6 { + port_id = <6>; + ucast_queue = <224 239>; + mcast_queue = <292 295>; + l0sp = <56 59>; + l0cdrr = <128 143>; + l0edrr = <128 143>; + l1cdrr = <28 31>; + l1edrr = <28 31>; + }; + port@7 { + port_id = <7>; + ucast_queue = <240 255>; + mcast_queue = <296 299>; + l0sp = <60 63>; + l0cdrr = <144 159>; + l0edrr = <144 159>; + l1cdrr = <32 35>; + l1edrr = <32 35>; + }; + }; + port_scheduler_config { + port@0 { + port_id = <0>; + l1scheduler { + group@0 { + sp = <0 1>; /*L0 SPs*/ + /*cpri cdrr epri edrr*/ + cfg = <0 0 0 0>; + }; + }; + l0scheduler { + group@0 { + /*unicast queues*/ + ucast_queue = <0 4 8>; + /*multicast queues*/ + mcast_queue = <256 260>; + /*sp cpri cdrr epri edrr*/ + cfg = <0 0 0 0 0>; + }; + group@1 { + ucast_queue = <1 5 9>; + mcast_queue = <257 261>; + cfg = <0 1 1 1 1>; + }; + group@2 { + ucast_queue = <2 6 10>; + mcast_queue = <258 262>; + cfg = <0 2 2 2 2>; + }; + group@3 { + ucast_queue = <3 7 11>; + mcast_queue = <259 263>; + cfg = <0 3 3 3 3>; + }; + }; + }; + port@1 { + port_id = <1>; + l1scheduler { + group@0 { + sp = <36>; + cfg = <0 8 0 8>; + }; + group@1 { + sp = <37>; + cfg = <1 9 1 9>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <144>; + ucast_loop_pri = <16>; + mcast_queue = <272>; + mcast_loop_pri = <4>; + cfg = <36 0 48 0 48>; + }; + }; + }; + port@2 { + port_id = <2>; + l1scheduler { + group@0 { + sp = <40>; + cfg = <0 12 0 12>; + }; + group@1 { + sp = <41>; + cfg = <1 13 1 13>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <160>; + ucast_loop_pri = <16>; + mcast_queue = <276>; + mcast_loop_pri = <4>; + cfg = <40 0 64 0 64>; + }; + }; + }; + port@3 { + port_id = <3>; + l1scheduler { + group@0 { + sp = <44>; + cfg = <0 16 0 16>; + }; + group@1 { + sp = <45>; + cfg = <1 17 1 17>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <176>; + ucast_loop_pri = <16>; + mcast_queue = <280>; + mcast_loop_pri = <4>; + cfg = <44 0 80 0 80>; + }; + }; + }; + port@4 { + port_id = <4>; + l1scheduler { + group@0 { + sp = <48>; + cfg = <0 20 0 20>; + }; + group@1 { + sp = <49>; + cfg = <1 21 1 21>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <192>; + ucast_loop_pri = <16>; + mcast_queue = <284>; + mcast_loop_pri = <4>; + cfg = <48 0 96 0 96>; + }; + }; + }; + port@5 { + port_id = <5>; + l1scheduler { + group@0 { + sp = <52>; + cfg = <0 24 0 24>; + }; + group@1 { + sp = <53>; + cfg = <1 25 1 25>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <208>; + ucast_loop_pri = <16>; + mcast_queue = <288>; + mcast_loop_pri = <4>; + cfg = <52 0 112 0 112>; + }; + }; + }; + port@6 { + port_id = <6>; + l1scheduler { + group@0 { + sp = <56>; + cfg = <0 28 0 28>; + }; + group@1 { + sp = <57>; + cfg = <1 29 1 29>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <224>; + ucast_loop_pri = <16>; + mcast_queue = <292>; + mcast_loop_pri = <4>; + cfg = <56 0 128 0 128>; + }; + }; + }; + port@7 { + port_id = <7>; + l1scheduler { + group@0 { + sp = <60>; + cfg = <0 32 0 32>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <240>; + mcast_queue = <296>; + cfg = <60 0 144 0 144>; + }; + }; + }; + }; + }; +/* + dp1 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <1>; + reg = <0x3a001000 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <0>; + phy-mode = "sgmii"; + }; + + dp2 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <2>; + reg = <0x3a001200 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <1>; + phy-mode = "sgmii"; + }; + + dp3 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <3>; + reg = <0x3a001400 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <2>; + phy-mode = "sgmii"; + }; + + dp4 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <4>; + reg = <0x3a001600 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <3>; + phy-mode = "sgmii"; + }; +*/ + dp6 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <6>; + reg = <0x3a001800 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <28>; + phy-mode = "sgmii"; + }; + + dp5 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <5>; + reg = <0x3a001a00 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <24>; + phy-mode = "sgmii"; + }; + + leds { + compatible = "gpio-leds"; + pinctrl-0 = <&led_pins>; + pinctrl-names = "default"; + + led_pwr { + label = "green:wan"; + gpios = <&tlmm 46 GPIO_ACTIVE_HIGH>; + default-state = "off"; + linux,default-trigger = "led_pwr"; + }; + + led_2g { + label = "green:wifi2"; + gpio = <&tlmm 47 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + led_5g { + label = "green:wifi5"; + gpio = <&tlmm 48 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + led_power: led_bt { + gpios = <&tlmm 50 GPIO_ACTIVE_HIGH>; + label = "green:power"; + default-state = "on"; + linux,default-trigger = "led_bt"; + }; + }; + nss-macsec0 { + compatible = "qcom,nss-macsec"; + phy_addr = <0x18>; + phy_access_mode = <0>; + mdiobus = <&mdio>; + }; + nss-macsec1 { + compatible = "qcom,nss-macsec"; + phy_addr = <0x1c>; + phy_access_mode = <0>; + mdiobus = <&mdio>; + }; +}; + +&serial_blsp4 { + pinctrl-0 = <&uart_pins>; + pinctrl-names = "default"; + status = "ok"; +}; + +&spi_0 { /* BLSP1 QUP1 */ + pinctrl-0 = <&spi_0_pins>; + pinctrl-names = "default"; + cs-select = <0>; + status = "ok"; + + m25p80@0 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0>; + compatible = "n25q128a11"; + linux,modalias = "m25p80", "n25q128a11"; + spi-max-frequency = <50000000>; + use-default-sizes; + }; +}; + +&serial_blsp2 { + pinctrl-0 = <&hsuart_pins>; + pinctrl-names = "default"; + status = "ok"; +}; + +&nss0 { + qcom,low-frequency = <187200000>; + qcom,mid-frequency = <748800000>; + qcom,max-frequency = <1497600000>; +}; + +&msm_imem { + status = "disabled"; +}; + +&ssphy_0 { + status = "ok"; +}; + +&qusb_phy_0 { + status = "ok"; +}; + +&ssphy_1 { + status = "ok"; +}; + +&qusb_phy_1 { + status = "ok"; +}; + +&usb3_0 { + status = "ok"; +}; + +&usb3_1 { + status = "ok"; +}; + +&cryptobam { + status = "ok"; +}; + +&crypto { + status = "ok"; +}; + +&i2c_0 { + status = "disabled"; +}; + +&i2c_1 { + status = "disabled"; +}; + +&qpic_bam { + status = "ok"; +}; + +&nand { + pinctrl-0 = <&qpic_pins>; + pinctrl-names = "default"; + status = "ok"; +}; + +&qpic_lcd { + status = "disabled"; +}; + +&qpic_lcd_panel { + status = "disabled"; +}; + +&ledc { + status = "disabled"; +}; + +&pcie0 { + status = "ok"; +}; + +&pcie1 { + status = "disabled"; +}; + +&glink_rpm { + status = "disabled"; +}; + +&apc_cpr { + /* Same CPR configuration as OAK */ + compatible = "qcom,cpr4-ipq817x-apss-regulator"; + + thread@0 { + apc_vreg: regulator { + regulator-min-microvolt = <1>; + regulator-max-microvolt = <2>; + qcom,cpr-fuse-corners = <2>; + qcom,cpr-corners = <3>; + qcom,cpr-speed-bin-corners = <3>; + qcom,cpr-corner-fmax-map = <1 3>; + + qcom,cpr-voltage-ceiling = + <840000 904000 944000>; + qcom,cpr-voltage-floor = + <592000 648000 712000>; + qcom,corner-frequencies = + <1017600000 1382400000 1382400000>; + + qcom,cpr-open-loop-voltage-fuse-adjustment-0 = + /* Speed bin 0; CPR rev 0..7 */ + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>; + + qcom,cpr-open-loop-voltage-fuse-adjustment-1 = + /* Speed bin 0; CPR rev 0..7 */ + < 0 0>, + < 0 0>, + < 0 0>, + < 20000 26000>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>; + + qcom,cpr-open-loop-voltage-fuse-adjustment-v2-0 = + /* Speed bin 0; CPR rev 0..7 */ + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>; + + qcom,cpr-open-loop-voltage-fuse-adjustment-v2-1 = + /* Speed bin 0; CPR rev 0..7 */ + < 0 0>, + < 0 7000>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>, + < 0 0>; + + qcom,cpr-floor-to-ceiling-max-range = + < 40000 40000 40000>, + < 40000 40000 40000>, + < 40000 40000 40000>, + < 40000 40000 40000>, + < 40000 40000 40000>, + < 40000 40000 40000>, + < 40000 40000 40000>, + < 40000 40000 40000>; + }; + }; +}; + +&npu_cpr { + status = "disabled"; +}; + +&nss0 { + npu-supply = <&dummy_reg>; + mx-supply = <&dummy_reg>; +}; + +&wifi0 { + qcom,board_id = <0x92>; +}; + +&wifi1 { + qcom,board_id = <0x292>; +}; diff --git a/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq807x-wf194c4.dts b/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq807x-wf194c4.dts new file mode 100644 index 0000000000..4c633fea5c --- /dev/null +++ b/target/linux/ipq807x/files/arch/arm64/boot/dts/qcom/qcom-ipq807x-wf194c4.dts @@ -0,0 +1,942 @@ +/dts-v1/; +/* + * Copyright (c) 2017-2019, The Linux Foundation. All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include "qcom-ipq807x-soc.dtsi" +#include "qcom-ipq807x-hk-cpu.dtsi" + +/ { + #address-cells = <0x2>; + #size-cells = <0x2>; + model = "CIG WF194c4"; + compatible = "cig,wf194c4", "qcom,ipq807x"; + qcom,msm-id = <0x156 0x0>; + interrupt-parent = <&intc>; + qcom,board-id = <0x8 0x0>; + qcom,pmic-id = <0x0 0x0 0x0 0x0>; + + aliases { + /* + * Aliases as required by u-boot + * to patch MAC addresses + */ + ethernet0 = "/soc/dp1"; + ethernet1 = "/soc/dp2"; + /* ethernet2 = "/soc/dp3"; + ethernet3 = "/soc/dp4"; + ethernet4 = "/soc/dp5"; + ethernet5 = "/soc/dp6"; + */ + }; + + chosen { + bootargs = "console=ttyMSM0,115200,n8 root=/dev/ram0 rw \ + init=/init"; + #ifdef __IPQ_MEM_PROFILE_256_MB__ + bootargs-append = " swiotlb=1"; + #else + bootargs-append = " swiotlb=1 coherent_pool=2M"; + #endif + }; +}; + +&tlmm { + pinctrl-0 = <&btcoex_pins>; + pinctrl-names = "default"; + + btcoex_pins: btcoex_pins { +/* + mux_0 { + pins = "gpio64"; + function = "pta1_1"; + drive-strength = <6>; + bias-pull-down; + }; + mux_1 { + pins = "gpio65"; + function = "pta1_2"; + drive-strength = <6>; + bias-pull-down; + }; + mux_2 { + pins = "gpio66"; + function = "pta1_0"; + drive-strength = <6>; + bias-pull-down; + }; + mux_3 { + pins = "gpio54"; + function = "pta2_0"; + drive-strength = <6>; + bias-pull-down; + }; + mux_4 { + pins = "gpio55"; + function = "pta2_1"; + drive-strength = <6>; + bias-pull-down; + }; + mux_5 { + pins = "gpio56"; + function = "pta2_2"; + drive-strength = <6>; + bias-pull-down; + }; +*/ + mux_0 { + pins = "gpio34"; + function = "gpio"; + drive-strength = <6>; + bias-pull-up; + output-high; + }; + mux_1 { + pins = "gpio62"; + function = "gpio"; + drive-strength = <6>; + bias-pull-up; + output-high; + }; + }; + + mdio_pins: mdio_pinmux { + mux_0 { + pins = "gpio68"; + function = "mdc"; + drive-strength = <8>; + bias-pull-up; + }; + mux_1 { + pins = "gpio69"; + function = "mdio"; + drive-strength = <8>; + bias-pull-up; + }; + }; + + uart_pins: uart_pins { + mux { + pins = "gpio23", "gpio24"; + function = "blsp4_uart1"; + drive-strength = <8>; + bias-disable; + }; + }; + + spi_0_pins: spi_0_pins { + mux { + pins = "gpio38", "gpio39", "gpio40", "gpio41"; + function = "blsp0_spi"; + drive-strength = <8>; + bias-disable; + }; + }; + + /*spi_3_pins: spi_3_pins { + mux { + pins = "gpio50", "gpio52", "gpio53"; + function = "blsp3_spi"; + drive-strength = <8>; + bias-disable; + }; + spi_cs { + pins = "gpio22"; + function = "blsp3_spi2"; + drive-strength = <8>; + bias-disable; + }; + quartz_interrupt { + pins = "gpio47"; + function = "gpio"; + input; + bias-disable; + }; + quartz_reset { + pins = "gpio21"; + function = "gpio"; + output-low; + bias-disable; + }; + };*/ + + qpic_pins: qpic_pins { + data_0 { + pins = "gpio15"; + function = "qpic_pad0"; + drive-strength = <8>; + bias-disable; + }; + data_1 { + pins = "gpio12"; + function = "qpic_pad1"; + drive-strength = <8>; + bias-disable; + }; + data_2 { + pins = "gpio13"; + function = "qpic_pad2"; + drive-strength = <8>; + bias-disable; + }; + data_3 { + pins = "gpio14"; + function = "qpic_pad3"; + drive-strength = <8>; + bias-disable; + }; + data_4 { + pins = "gpio5"; + function = "qpic_pad4"; + drive-strength = <8>; + bias-disable; + }; + data_5 { + pins = "gpio6"; + function = "qpic_pad5"; + drive-strength = <8>; + bias-disable; + }; + data_6 { + pins = "gpio7"; + function = "qpic_pad6"; + drive-strength = <8>; + bias-disable; + }; + data_7 { + pins = "gpio8"; + function = "qpic_pad7"; + drive-strength = <8>; + bias-disable; + }; + qpic_pad { + pins = "gpio1", "gpio3", "gpio4", + "gpio10", "gpio11", "gpio17"; + function = "qpic_pad"; + drive-strength = <8>; + bias-disable; + }; + }; + + hsuart_pins: hsuart_pins { + mux { + pins = "gpio48", "gpio49"; + function = "blsp2_uart"; + drive-strength = <8>; + bias-disable; + }; + }; + + button_pins: button_pins { + wps_button { + pins = "gpio67"; + function = "gpio"; + drive-strength = <8>; + bias-pull-up; + }; + }; + + leds_pins: leds_pinmux { + led1_r { + pins = "gpio54"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + led1_g { + pins = "gpio55"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + led2_r { + pins = "gpio56"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + led2_g { + pins = "gpio64"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + + }; + + /*usb_mux_sel_pins: usb_mux_pins { + mux { + pins = "gpio27"; + function = "gpio"; + drive-strength = <8>; + bias-pull-down; + }; + }; + + pcie0_pins: pcie_pins { + pcie0_rst { + pins = "gpio58"; + function = "pcie0_rst"; + drive-strength = <8>; + bias-pull-down; + }; + pcie0_wake { + pins = "gpio59"; + function = "pcie0_wake"; + drive-strength = <8>; + bias-pull-down; + }; + };*/ + uniphy_pins: uniphy_pinmux { + mux_2 { + pins = "gpio37"; + function = "gpio"; + drive-strength = <8>; + bias-pull-up; + }; + mux_3 { + pins = "gpio44"; + function = "gpio"; + drive-strength = <8>; + bias-pull-up; + }; + }; + +}; + +&soc { + gpio_keys { + compatible = "gpio-keys"; + pinctrl-0 = <&button_pins>; + pinctrl-names = "default"; + + button@1 { + label = "reset"; + linux,code = ; + gpios = <&tlmm 67 GPIO_ACTIVE_LOW>; + linux,input-type = <1>; + debounce-interval = <60>; + }; + }; + + leds { + compatible = "gpio-leds"; + pinctrl-0 = <&leds_pins>; + pinctrl-names = "default"; + status = "ok"; + + led@54 { + label = "red:lan"; + gpios = <&tlmm 54 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "led1_r"; + default-state = "off"; + }; + led@55 { + label = "green:lan"; + gpios = <&tlmm 55 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "led1_g"; + default-state = "off"; + }; + led@56 { + label = "red:wan"; + gpios = <&tlmm 56 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "led2_r"; + default-state = "off"; + }; + led@64 { + label = "green:wan"; + gpios = <&tlmm 64 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "led2_g"; + default-state = "off"; + }; + }; + mdio: mdio@90000 { + pinctrl-0 = <&mdio_pins>; + pinctrl-names = "default"; + phy-reset-gpio = <&tlmm 37 0 &tlmm 44 0>; + phy0: ethernet-phy@0 { + reg = <0x10>; /*<0>*/ + }; + phy1: ethernet-phy@1 { + reg = <0x11>; + }; + phy2: ethernet-phy@2 { + reg = <0x12>; + }; + phy3: ethernet-phy@3 { + reg = <0x13>; + }; + phy4: ethernet-phy@4 { + reg = <0x14>; + }; + phy5: ethernet-phy@5 { + compatible ="ethernet-phy-ieee802.3-c45"; + reg = <0>; + }; + }; + + ess-switch@3a000000 { + pinctrl-0 = <&uniphy_pins>; + pinctrl-names = "default"; + switch_cpu_bmp = <0x1>; /* cpu port bitmap */ + switch_lan_bmp = <0x32>; /*..0x3e lan port bitmap */ + switch_wan_bmp = <0x40>; /* wan port bitmap */ + switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ + switch_mac_mode1 = <0xff>; /* mac mode for uniphy instance1*/ + switch_mac_mode2 = <0xd>; /* mac mode for uniphy instance2*/ + bm_tick_mode = <0>; /* bm tick mode */ + tm_tick_mode = <0>; /* tm tick mode */ + /*qcom,port_phyinfo { + port@0 { + port_id = <1>; + phy_address = <0>; + }; + port@1 { + port_id = <2>; + phy_address = <1>; + }; + port@2 { + port_id = <3>; + phy_address = <2>; + }; + port@3 { + port_id = <4>; + phy_address = <3>; + }; + port@4 { + port_id = <5>; + phy_address = <24>; + port_mac_sel = "QGMAC_PORT"; + }; + port@5 { + port_id = <6>; + phy_address = <28>; + port_mac_sel = "QGMAC_PORT"; + }; + };*/ + port_scheduler_resource { + port@0 { + port_id = <0>; + ucast_queue = <0 143>; + mcast_queue = <256 271>; + l0sp = <0 35>; + l0cdrr = <0 47>; + l0edrr = <0 47>; + l1cdrr = <0 7>; + l1edrr = <0 7>; + }; + port@1 { + port_id = <1>; + ucast_queue = <144 159>; + mcast_queue = <272 275>; + l0sp = <36 39>; + l0cdrr = <48 63>; + l0edrr = <48 63>; + l1cdrr = <8 11>; + l1edrr = <8 11>; + }; + port@2 { + port_id = <2>; + ucast_queue = <160 175>; + mcast_queue = <276 279>; + l0sp = <40 43>; + l0cdrr = <64 79>; + l0edrr = <64 79>; + l1cdrr = <12 15>; + l1edrr = <12 15>; + }; + port@3 { + port_id = <3>; + ucast_queue = <176 191>; + mcast_queue = <280 283>; + l0sp = <44 47>; + l0cdrr = <80 95>; + l0edrr = <80 95>; + l1cdrr = <16 19>; + l1edrr = <16 19>; + }; + port@4 { + port_id = <4>; + ucast_queue = <192 207>; + mcast_queue = <284 287>; + l0sp = <48 51>; + l0cdrr = <96 111>; + l0edrr = <96 111>; + l1cdrr = <20 23>; + l1edrr = <20 23>; + }; + port@5 { + port_id = <5>; + ucast_queue = <208 223>; + mcast_queue = <288 291>; + l0sp = <52 55>; + l0cdrr = <112 127>; + l0edrr = <112 127>; + l1cdrr = <24 27>; + l1edrr = <24 27>; + }; + port@6 { + port_id = <6>; + ucast_queue = <224 239>; + mcast_queue = <292 295>; + l0sp = <56 59>; + l0cdrr = <128 143>; + l0edrr = <128 143>; + l1cdrr = <28 31>; + l1edrr = <28 31>; + }; + port@7 { + port_id = <7>; + ucast_queue = <240 255>; + mcast_queue = <296 299>; + l0sp = <60 63>; + l0cdrr = <144 159>; + l0edrr = <144 159>; + l1cdrr = <32 35>; + l1edrr = <32 35>; + }; + }; + port_scheduler_config { + port@0 { + port_id = <0>; + l1scheduler { + group@0 { + sp = <0 1>; /*L0 SPs*/ + /*cpri cdrr epri edrr*/ + cfg = <0 0 0 0>; + }; + }; + l0scheduler { + group@0 { + /*unicast queues*/ + ucast_queue = <0 4 8>; + /*multicast queues*/ + mcast_queue = <256 260>; + /*sp cpri cdrr epri edrr*/ + cfg = <0 0 0 0 0>; + }; + group@1 { + ucast_queue = <1 5 9>; + mcast_queue = <257 261>; + cfg = <0 1 1 1 1>; + }; + group@2 { + ucast_queue = <2 6 10>; + mcast_queue = <258 262>; + cfg = <0 2 2 2 2>; + }; + group@3 { + ucast_queue = <3 7 11>; + mcast_queue = <259 263>; + cfg = <0 3 3 3 3>; + }; + }; + }; + port@1 { + port_id = <1>; + l1scheduler { + group@0 { + sp = <36>; + cfg = <0 8 0 8>; + }; + group@1 { + sp = <37>; + cfg = <1 9 1 9>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <144>; + ucast_loop_pri = <16>; + mcast_queue = <272>; + mcast_loop_pri = <4>; + cfg = <36 0 48 0 48>; + }; + }; + }; + port@2 { + port_id = <2>; + l1scheduler { + group@0 { + sp = <40>; + cfg = <0 12 0 12>; + }; + group@1 { + sp = <41>; + cfg = <1 13 1 13>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <160>; + ucast_loop_pri = <16>; + mcast_queue = <276>; + mcast_loop_pri = <4>; + cfg = <40 0 64 0 64>; + }; + }; + }; + port@3 { + port_id = <3>; + l1scheduler { + group@0 { + sp = <44>; + cfg = <0 16 0 16>; + }; + group@1 { + sp = <45>; + cfg = <1 17 1 17>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <176>; + ucast_loop_pri = <16>; + mcast_queue = <280>; + mcast_loop_pri = <4>; + cfg = <44 0 80 0 80>; + }; + }; + }; + port@4 { + port_id = <4>; + l1scheduler { + group@0 { + sp = <48>; + cfg = <0 20 0 20>; + }; + group@1 { + sp = <49>; + cfg = <1 21 1 21>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <192>; + ucast_loop_pri = <16>; + mcast_queue = <284>; + mcast_loop_pri = <4>; + cfg = <48 0 96 0 96>; + }; + }; + }; + port@5 { + port_id = <5>; + l1scheduler { + group@0 { + sp = <52>; + cfg = <0 24 0 24>; + }; + group@1 { + sp = <53>; + cfg = <1 25 1 25>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <208>; + ucast_loop_pri = <16>; + mcast_queue = <288>; + mcast_loop_pri = <4>; + cfg = <52 0 112 0 112>; + }; + }; + }; + port@6 { + port_id = <6>; + l1scheduler { + group@0 { + sp = <56>; + cfg = <0 28 0 28>; + }; + group@1 { + sp = <57>; + cfg = <1 29 1 29>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <224>; + ucast_loop_pri = <16>; + mcast_queue = <292>; + mcast_loop_pri = <4>; + cfg = <56 0 128 0 128>; + }; + }; + }; + port@7 { + port_id = <7>; + l1scheduler { + group@0 { + sp = <60>; + cfg = <0 32 0 32>; + }; + }; + l0scheduler { + group@0 { + ucast_queue = <240>; + mcast_queue = <296>; + cfg = <60 0 144 0 144>; + }; + }; + }; + }; + }; +/* + dp1 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <1>; + reg = <0x3a001000 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <0>; + phy-mode = "sgmii"; + }; + + dp2 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <2>; + reg = <0x3a001200 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <1>; + phy-mode = "sgmii"; + }; + + dp3 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <3>; + reg = <0x3a001400 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <2>; + phy-mode = "sgmii"; + }; + + dp4 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <4>; + reg = <0x3a001600 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <3>; + phy-mode = "sgmii"; + }; + + dp5 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <5>; + reg = <0x3a003000 0x3fff>; + qcom,mactype = <1>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <24>; + phy-mode = "sgmii"; + }; + + dp6 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <6>; + reg = <0x3a007000 0x3fff>; + qcom,mactype = <1>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <28>; + phy-mode = "sgmii"; + }; +*/ + dp1 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <4>; + reg = <0x3a001600 0x200>; + // qcom,id = <1>; + // reg = <0x3a001000 0x200>; + qcom,mactype = <0>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <0x13>; + phy-mode = "sgmii"; + }; + dp2 { + device_type = "network"; + compatible = "qcom,nss-dp"; + qcom,id = <6>; + reg = <0x3a007000 0x3fff>; + qcom,mactype = <1>; + local-mac-address = [000000000000]; + qcom,link-poll = <1>; + qcom,phy-mdio-addr = <0>; + phy-mode = "sgmii"; + }; +/* + leds { + compatible = "gpio-leds"; + pinctrl-0 = <&led_pins>; + pinctrl-names = "default"; + + led_2g { + label = "led_2g"; + gpio = <&tlmm 42 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + led_5g { + label = "led_5g"; + gpio = <&tlmm 43 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + }; + + nss-macsec0 { + compatible = "qcom,nss-macsec"; + phy_addr = <0x18>; + phy_access_mode = <0>; + mdiobus = <&mdio>; + }; + nss-macsec1 { + compatible = "qcom,nss-macsec"; + phy_addr = <0x1c>; + phy_access_mode = <0>; + mdiobus = <&mdio>; + }; +*/ +}; + +&serial_blsp4 { + pinctrl-0 = <&uart_pins>; + pinctrl-names = "default"; + status = "ok"; +}; + +&spi_0 { /* BLSP1 QUP1 */ + pinctrl-0 = <&spi_0_pins>; + pinctrl-names = "default"; + cs-select = <0>; + status = "ok"; + + m25p80@0 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0>; + compatible = "n25q128a11"; + linux,modalias = "m25p80", "n25q128a11"; + spi-max-frequency = <50000000>; + use-default-sizes; + }; +}; +/* +&spi_4 { + pinctrl-0 = <&spi_3_pins>; + pinctrl-names = "default"; + cs-select = <2>; + quartz-reset-gpio = <&tlmm 21 1>; + status = "ok"; + spidev3: spi@3 { + compatible = "qca,spidev"; + reg = <0>; + spi-max-frequency = <24000000>; + }; +};*/ + +&serial_blsp2 { + pinctrl-0 = <&hsuart_pins>; + pinctrl-names = "default"; + status = "ok"; +}; + +&msm_imem { + status = "disabled"; +}; + +&ssphy_0 { + status = "ok"; +}; + +&qusb_phy_0 { + status = "ok"; +}; + +&ssphy_1 { + status = "ok"; +}; + +&qusb_phy_1 { + status = "ok"; +}; + +&usb3_0 { + status = "ok"; +}; + +&usb3_1 { + status = "ok"; +}; + +&cryptobam { + status = "ok"; +}; + +&crypto { + status = "ok"; +}; + +&i2c_0 { + status = "disabled"; +}; + +&i2c_1 { + status = "disabled"; +}; + +&qpic_bam { + status = "ok"; +}; + +&nand { + pinctrl-0 = <&qpic_pins>; + pinctrl-names = "default"; + status = "ok"; +}; + +&qpic_lcd { + status = "disabled"; +}; + +&qpic_lcd_panel { + status = "disabled"; +}; + +&ledc { + status = "disabled"; +}; + +&pcie0 { + status = "disabled"; +}; + +&pcie1 { + status = "disabled"; +}; + diff --git a/target/linux/ipq807x/image/Makefile b/target/linux/ipq807x/image/Makefile new file mode 100644 index 0000000000..3a219e6d6c --- /dev/null +++ b/target/linux/ipq807x/image/Makefile @@ -0,0 +1,26 @@ +include $(TOPDIR)/rules.mk +include $(INCLUDE_DIR)/image.mk + +IMG_PREFIX:=$(VERSION_DIST_SANITIZED)-$(IMG_PREFIX_VERNUM)$(IMG_PREFIX_VERCODE)$(IMG_PREFIX_EXTRA)$(BOARD) + +# default all platform image(fit) build +define Device/Default + PROFILES = Default $$(DEVICE_NAME) + KERNEL_NAME := zImage + FILESYSTEMS := squashfs + DEVICE_DTS_DIR := $(DTS_DIR) + KERNEL_IN_UBI := 1 + ROOTFSNAME_IN_UBI := ubi_rootfs + BLOCKSIZE := 128k + PAGESIZE := 2048 + IMAGES := sysupgrade.tar nand-factory.bin + IMAGE/sysupgrade.tar := sysupgrade-tar | append-metadata + IMAGE/nand-factory.bin := append-ubi | qsdk-ipq-factory-nand + KERNEL_NAME := Image + KERNEL = kernel-bin | gzip | fit gzip $$(KDIR)/image-$$(firstword $$(DEVICE_DTS)).dtb + KERNEL_INITRAMFS = kernel-bin | gzip | fit gzip $$(KDIR)/image-$$(firstword $$(DEVICE_DTS)).dtb +endef + +include $(SUBTARGET).mk + +$(eval $(call BuildImage)) diff --git a/target/linux/ipq807x/image/ipq50xx.mk b/target/linux/ipq807x/image/ipq50xx.mk new file mode 100644 index 0000000000..f20d54190e --- /dev/null +++ b/target/linux/ipq807x/image/ipq50xx.mk @@ -0,0 +1,10 @@ +KERNEL_LOADADDR := 0x41208000 + +define Device/qcom_mp03_3 + DEVICE_TITLE := Qualcomm Maple 03.3 + DEVICE_DTS := qcom-ipq5018-mp03.3 + SUPPORTED_DEVICES := qcom,ipq5018-mp03.3 + DEVICE_PACKAGES := ath11k-wifi-qcom-ipq5018 + DEVICE_DTS_CONFIG := config@mp03.3 +endef +TARGET_DEVICES += qcom_mp03_3 diff --git a/target/linux/ipq807x/image/ipq60xx.mk b/target/linux/ipq807x/image/ipq60xx.mk new file mode 100644 index 0000000000..e94ab22785 --- /dev/null +++ b/target/linux/ipq807x/image/ipq60xx.mk @@ -0,0 +1,56 @@ +KERNEL_LOADADDR := 0x41008000 + +define Device/cig_wf188 + DEVICE_TITLE := Cigtech WF-188 + DEVICE_DTS := qcom-ipq6018-cig-wf188 + DEVICE_DTS_CONFIG := config@cp03-c1 + SUPPORTED_DEVICES := cig,wf188 + IMAGES := sysupgrade.tar + IMAGE/sysupgrade.tar/squashfs := append-rootfs | pad-rootfs | sysupgrade-tar rootfs=$$$$@ | append-metadata + DEVICE_PACKAGES := ath11k-wifi-cig-wf188 uboot-env +endef +TARGET_DEVICES += cig_wf188 + +define Device/cig_wf188n + DEVICE_TITLE := Cigtech WF-188n + DEVICE_DTS := qcom-ipq6018-cig-wf188n + DEVICE_DTS_CONFIG := config@cp03-c1 + SUPPORTED_DEVICES := cig,wf188n + DEVICE_PACKAGES := ath11k-wifi-cig-wf188n uboot-env +endef +TARGET_DEVICES += cig_wf188n + +define Device/edgecore_eap101 + DEVICE_TITLE := EdgeCore EAP101 + DEVICE_DTS := qcom-ipq6018-edgecore-eap101 + DEVICE_DTS_CONFIG := config@cp01-c1 + SUPPORTED_DEVICES := edgecore,eap101 + DEVICE_PACKAGES := ath11k-wifi-edgecore-eap101 uboot-envtools +endef +TARGET_DEVICES += edgecore_eap101 + +define Device/wallys_dr6018 + DEVICE_TITLE := Wallys DR6018 + DEVICE_DTS := qcom-ipq6018-wallys-dr6018 + DEVICE_DTS_CONFIG := config@cp01-c4 + SUPPORTED_DEVICES := wallys,dr6018 + DEVICE_PACKAGES := ath11k-wifi-wallys-dr6018 uboot-envtools +endef +TARGET_DEVICES += wallys_dr6018 + +define Device/qcom_cp01_c1 + DEVICE_TITLE := Qualcomm Cypress C1 + DEVICE_DTS := qcom-ipq6018-cp01-c1 + SUPPORTED_DEVICES := qcom,ipq6018-cp01 + DEVICE_PACKAGES := ath11k-wifi-qcom-ipq6018 +endef +TARGET_DEVICES += qcom_cp01_c1 + +define Device/xiaomi_ax1800 + DEVICE_TITLE := Xiaomi AX1800 + DEVICE_DTS := qcom-ipq6018-miwifi-ax1800 + SUPPORTED_DEVICES := xiaomi,ax1800 + DEVICE_DTS_CONFIG := config@cp03-c1 + DEVICE_PACKAGES := ath11k-wifi-xiaomi-ax1800 +endef +TARGET_DEVICES += xiaomi_ax1800 diff --git a/target/linux/ipq807x/image/ipq807x.mk b/target/linux/ipq807x/image/ipq807x.mk new file mode 100644 index 0000000000..1785478ea4 --- /dev/null +++ b/target/linux/ipq807x/image/ipq807x.mk @@ -0,0 +1,90 @@ +KERNEL_LOADADDR := 0x41208000 + +define Device/qcom_hk01 + DEVICE_TITLE := Qualcomm Hawkeye HK01 + DEVICE_DTS := qcom-ipq807x-hk01 + DEVICE_DTS_CONFIG=config@hk01 + SUPPORTED_DEVICES := qcom,ipq807x-hk01 + DEVICE_PACKAGES := ath11k-wifi-qcom-ipq8074 +endef +TARGET_DEVICES += qcom_hk01 + +define Device/qcom_hk14 + DEVICE_TITLE := Qualcomm Hawkeye HK14 + DEVICE_DTS := qcom-ipq807x-hk14 + DEVICE_DTS_CONFIG=config@hk14 + SUPPORTED_DEVICES := qcom,ipq807x-hk14 + DEVICE_PACKAGES := ath11k-wifi-qcom-ipq8074 kmod-ath11k-pci ath11k-firmware-qcn9000 +endef +TARGET_DEVICES += qcom_hk14 + +define Device/sercomm_wallaby + DEVICE_TITLE := Sercomm Kiwi + DEVICE_DTS := qcom-ipq807x-sercomm-wallaby + DEVICE_DTS_CONFIG=config@hk09 + SUPPORTED_DEVICES := sercomm,wallaby + DEVICE_PACKAGES := ath11k-wifi-sercomm-wallaby +endef +TARGET_DEVICES += sercomm_wallaby + +define Device/cig_wf194 + DEVICE_TITLE := CIG WF194C + DEVICE_DTS := qcom-ipq807x-wf194c + DEVICE_DTS_CONFIG=config@hk01 + SUPPORTED_DEVICES := cig,wf194c + DEVICE_PACKAGES := ath11k-wifi-cig-wf194c aq-fw-download uboot-envtools kmod-usb3 kmod-usb2 +endef +TARGET_DEVICES += cig_wf194 + +define Device/cig_wf194c4 + DEVICE_TITLE := CIG WF194C4 + DEVICE_DTS := qcom-ipq807x-wf194c4 + DEVICE_DTS_CONFIG=config@hk09 + SUPPORTED_DEVICES := cig,wf194c4 + DEVICE_PACKAGES := ath11k-wifi-cig-wf194c4 aq-fw-download uboot-envtools kmod-usb3 kmod-usb2 +endef +TARGET_DEVICES += cig_wf194c4 + +define Device/edgecore_eap102 + DEVICE_TITLE := Edgecore EAP102 + DEVICE_DTS := qcom-ipq807x-eap102 + DEVICE_DTS_CONFIG=config@ac02 + SUPPORTED_DEVICES := edgecore,eap102 + DEVICE_PACKAGES := ath11k-wifi-edgecore-eap102 kmod-usb3 uboot-envtools +endef +TARGET_DEVICES += edgecore_eap102 + +define Device/edgecore_eap106 + DEVICE_TITLE := Edgecore EAP102 + DEVICE_DTS := qcom-ipq807x-eap106 + DEVICE_DTS_CONFIG=config@hk02 + SUPPORTED_DEVICES := edgecore,eap106 + DEVICE_PACKAGES := ath11k-wifi-edgecore-eap106 iaq-fw-download kmod-usb2 kmod-usb3 uboot-envtools +endef +TARGET_DEVICES += edgecore_eap106 + +define Device/tplink_ex227 + DEVICE_TITLE := TP-Link EX227 + DEVICE_DTS := qcom-ipq807x-ex227 + DEVICE_DTS_CONFIG=config@hk07 + SUPPORTED_DEVICES := tplink,ex227 + DEVICE_PACKAGES := ath11k-wifi-tplink-ex227 + IMAGES := sysupgrade.tar nand-factory.bin nand-factory.ubi + IMAGE/sysupgrade.tar := sysupgrade-tar | append-metadata + IMAGE/nand-factory.bin := append-ubi | qsdk-ipq-factory-nand + IMAGE/nand-factory.ubi := append-ubi +endef +TARGET_DEVICES += tplink_ex227 + +define Device/tplink_ex447 + DEVICE_TITLE := TP-Link EX447 + DEVICE_DTS := qcom-ipq807x-ex447 + DEVICE_DTS_CONFIG=config@hk09 + SUPPORTED_DEVICES := tplink,ex447 + DEVICE_PACKAGES := ath11k-wifi-tplink-ex447 + IMAGES := sysupgrade.tar nand-factory.bin nand-factory.ubi + IMAGE/sysupgrade.tar := sysupgrade-tar | append-metadata + IMAGE/nand-factory.bin := append-ubi | qsdk-ipq-factory-nand + IMAGE/nand-factory.ubi := append-ubi +endef +TARGET_DEVICES += tplink_ex447 diff --git a/target/linux/ipq807x/ipq50xx/config-default b/target/linux/ipq807x/ipq50xx/config-default new file mode 100644 index 0000000000..b8e202c874 --- /dev/null +++ b/target/linux/ipq807x/ipq50xx/config-default @@ -0,0 +1,84 @@ +# CONFIG_AHCI_IPQ is not set +CONFIG_ARCH_IPQ5018=y +# CONFIG_DIAGFWD_BRIDGE_CODE is not set +CONFIG_IPQ_ADSS_5018=y +CONFIG_IPQ_APSS_5018=y +CONFIG_IPQ_GCC_5018=y +# CONFIG_NET_SWITCHDEV is not set +CONFIG_NUM_ALT_PARTITION=16 +CONFIG_PINCTRL_IPQ5018=y +# CONFIG_IPC_LOGGING is not set +CONFIG_IPQ_SUBSYSTEM_DUMP=y +CONFIG_SPS=y +CONFIG_SPS_SUPPORT_NDP_BAM=y +CONFIG_CORESIGHT=y +CONFIG_CORESIGHT_CSR=y +CONFIG_CORESIGHT_CTI=y +CONFIG_CORESIGHT_EVENT=y +CONFIG_CORESIGHT_HWEVENT=y +CONFIG_CORESIGHT_LINKS_AND_SINKS=y +CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y +CONFIG_CORESIGHT_QCOM_REPLICATOR=y +# CONFIG_INPUT_PM8941_PWRKEY is not set +CONFIG_MDIO_QCA=y +# CONFIG_CRYPTO_ALL_CASES is not set +CONFIG_CRYPTO_DEV_QCOM_ICE=y +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_SHA512=y +# CONFIG_CORESIGHT_QPDI is not set +# CONFIG_CORESIGHT_SINK_ETBV10 is not set +CONFIG_CORESIGHT_SINK_TPIU=y +# CONFIG_CORESIGHT_SOURCE_DUMMY is not set +CONFIG_CORESIGHT_SOURCE_ETM3X=y +CONFIG_CORESIGHT_SOURCE_ETM4X=y +# CONFIG_CORESIGHT_REMOTE_ETM is not set +CONFIG_CORESIGHT_STM=y +CONFIG_CORESIGHT_TPDA=y +CONFIG_CORESIGHT_TPDM=y +# CONFIG_CORESIGHT_TPDM_DEFAULT_ENABLE is not set +CONFIG_IIO=y +# CONFIG_IIO_BUFFER is not set +# CONFIG_IIO_TRIGGER is not set +CONFIG_PCIE_DW_PLAT=y +CONFIG_PHY_IPQ_UNIPHY_PCIE=y +CONFIG_VMSPLIT_2G=y +# CONFIG_VMSPLIT_3G is not set +CONFIG_PPS=y +CONFIG_PTP_1588_CLOCK=y +# CONFIG_DP83640_PHY is not set +CONFIG_PWM_IPQ5018=y +CONFIG_QCOM_APM=y +CONFIG_QCOM_DCC=y +# CONFIG_QCOM_SPMI_TEMP_ALARM is not set +CONFIG_MMC_SDHCI_MSM_ICE=y +CONFIG_USB_BAM=y +CONFIG_MAILBOX=y +# CONFIG_USB_QCOM_DIAG_BRIDGE is not set +# CONFIG_USB_CONFIGFS_F_DIAG is not set +# CONFIG_NF_IPV6_DUMMY_HEADER is not set +CONFIG_RMNET_DATA=y +CONFIG_RMNET_DATA_DEBUG_PKT=y +CONFIG_MTD_NAND_SERIAL=y +CONFIG_PAGE_SCOPE_MULTI_PAGE_READ=y +# CONFIG_RMNET_DATA_FC is not set +CONFIG_CRYPTO_NO_ZERO_LEN_HASH=y +CONFIG_CRYPTO_DISABLE_AES192_TEST=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set +CONFIG_QTI_EUD=y +CONFIG_USB_QCA_M31_PHY=y +CONFIG_QGIC2_MSI=y +CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y +CONFIG_PWM_IPQ4019=y +CONFIG_RMNET=y +CONFIG_QCOM_QMI_RMNET=y +CONFIG_QCOM_QMI_DFC=y +CONFIG_QCOM_QMI_POWER_COLLAPSE=y +CONFIG_RMNET_CTL=y +CONFIG_RMNET_CTL_DEBUG=y +CONFIG_SND_SOC_IPQ_LPASS=y +CONFIG_SND_SOC_IPQ_LPASS_PCM_RAW=y +# CONFIG_SND_SOC_IPQ_PCM_RAW is not set diff --git a/target/linux/ipq807x/ipq50xx/config-lowmem b/target/linux/ipq807x/ipq50xx/config-lowmem new file mode 100644 index 0000000000..b1b817ef6f --- /dev/null +++ b/target/linux/ipq807x/ipq50xx/config-lowmem @@ -0,0 +1,73 @@ +# CONFIG_AHCI_IPQ is not set +CONFIG_ARCH_IPQ5018=y +# CONFIG_DIAGFWD_BRIDGE_CODE is not set +CONFIG_IPQ_ADSS_5018=y +CONFIG_IPQ_APSS_5018=y +CONFIG_IPQ_GCC_5018=y +# CONFIG_NET_SWITCHDEV is not set +CONFIG_NUM_ALT_PARTITION=16 +CONFIG_PINCTRL_IPQ5018=y +# CONFIG_IPC_LOGGING is not set +CONFIG_IPQ_SUBSYSTEM_DUMP=y +# CONFIG_SPS is not set +# CONFIG_SPS_SUPPORT_NDP_BAM is not set +# CONFIG_CORESIGHT is not set +# CONFIG_INPUT_PM8941_PWRKEY is not set +CONFIG_MDIO_QCA=y +# CONFIG_CRYPTO_ALL_CASES is not set +# CONFIG_CRYPTO_DEV_QCOM_ICE is not set +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_IIO is not set +# CONFIG_IIO_BUFFER is not set +# CONFIG_IIO_TRIGGER is not set +CONFIG_PCIE_DW_PLAT=y +CONFIG_PHY_IPQ_UNIPHY_PCIE=y +CONFIG_VMSPLIT_2G=y +# CONFIG_VMSPLIT_3G is not set +# CONFIG_PPS is not set +# CONFIG_PTP_1588_CLOCK is not set +# CONFIG_DP83640_PHY is not set +CONFIG_PWM_IPQ5018=y +CONFIG_QCOM_APM=y +# CONFIG_QCOM_DCC is not set +# CONFIG_QCOM_SPMI_TEMP_ALARM is not set +CONFIG_MMC_SDHCI_MSM_ICE=y +CONFIG_USB_BAM=y +CONFIG_MAILBOX=y +# CONFIG_USB_QCOM_DIAG_BRIDGE is not set +# CONFIG_USB_CONFIGFS_F_DIAG is not set +# CONFIG_NF_IPV6_DUMMY_HEADER is not set +# CONFIG_RMNET_DATA is not set +# CONFIG_RMNET_DATA_DEBUG_PKT is not set +CONFIG_MTD_NAND_SERIAL=y +CONFIG_PAGE_SCOPE_MULTI_PAGE_READ=y +# CONFIG_RMNET_DATA_FC is not set +# CONFIG_CRYPTO_NO_ZERO_LEN_HASH is not set +# CONFIG_CRYPTO_DISABLE_AES192_TEST is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set +CONFIG_QTI_EUD=y +CONFIG_USB_QCA_M31_PHY=y +CONFIG_SQUASHFS_XZ=y +# CONFIG_SQUASHFS_ZLIB is not set +# CONFIG_JFFS2_LZMA is not set +CONFIG_JFFS2_ZLIB=y +# CONFIG_LZO_COMPRESS is not set +# CONFIG_LZO_DECOMPRESS is not set +CONFIG_XZ_DEC=y +# CONFIG_XZ_DEC_X86 is not set +# CONFIG_XZ_DEC_POWERPC is not set +# CONFIG_XZ_DEC_IA64 is not set +CONFIG_XZ_DEC_ARM=y +# CONFIG_XZ_DEC_ARMTHUMB is not set +# CONFIG_XZ_DEC_SPARC is not set +CONFIG_XZ_DEC_BCJ=y +# CONFIG_LZO_COMPRESS is not set +# CONFIG_LZO_DECOMPRESS is not set +# CONFIG_CRYPTO is not set +CONFIG_QGIC2_MSI=y +CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y diff --git a/target/linux/ipq807x/ipq50xx/target.mk b/target/linux/ipq807x/ipq50xx/target.mk new file mode 100644 index 0000000000..649f398ba6 --- /dev/null +++ b/target/linux/ipq807x/ipq50xx/target.mk @@ -0,0 +1,10 @@ + +SUBTARGET:=ipq50xx +BOARDNAME:=IPQ50XX +CPU_TYPE:=cortex-a7 + +DEFAULT_PACKAGES += ath11k-firmware-ipq50xx qca-nss-fw-ipq50xx + +define Target/Description + Build firmware image for IPQ50xx SoC devices. +endef diff --git a/target/linux/ipq807x/ipq60xx/config-default b/target/linux/ipq807x/ipq60xx/config-default new file mode 100644 index 0000000000..3d7c59698e --- /dev/null +++ b/target/linux/ipq807x/ipq60xx/config-default @@ -0,0 +1,122 @@ +CONFIG_ALLOC_BUFFERS_IN_4K_CHUNKS=y +CONFIG_ANDROID=y +# CONFIG_ANDROID_BINDER_IPC is not set +# CONFIG_AQ_PHY is not set +CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_ARCH_IPQ6018=y +# CONFIG_ARCH_MSM8X60 is not set +CONFIG_ARM_DMA_IOMMU_ALIGNMENT=8 +CONFIG_ARM_DMA_USE_IOMMU=y +CONFIG_ARM_HEAVY_MB=y +CONFIG_ARM_QTI_IPQ60XX_CPUFREQ=y +CONFIG_ARM_SMMU=y +CONFIG_ASN1=y +CONFIG_ASSOCIATIVE_ARRAY=y +CONFIG_CACHE_L2X0=y +CONFIG_CLZ_TAB=y +CONFIG_CMA=y +CONFIG_CMA_ALIGNMENT=8 +CONFIG_CMA_AREAS=7 +CONFIG_CMA_DEBUG=y +CONFIG_CMA_DEBUGFS=y +CONFIG_CMA_SIZE_MBYTES=0 +CONFIG_CMA_SIZE_SEL_MBYTES=y +CONFIG_CORESIGHT=y +CONFIG_CORESIGHT_CSR=y +CONFIG_CORESIGHT_CTI=y +CONFIG_CORESIGHT_HWEVENT=y +CONFIG_CORESIGHT_LINKS_AND_SINKS=y +CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y +CONFIG_CORESIGHT_QCOM_REPLICATOR=y +CONFIG_CORESIGHT_SINK_TPIU=y +CONFIG_CORESIGHT_SOURCE_ETM3X=y +CONFIG_CORESIGHT_SOURCE_ETM4X=y +CONFIG_CORESIGHT_STM=y +CONFIG_CORESIGHT_TPDA=y +CONFIG_CORESIGHT_TPDM=y +CONFIG_CRC_CCITT=m +CONFIG_CRYPTO_AKCIPHER=y +CONFIG_CRYPTO_AKCIPHER2=y +CONFIG_CRYPTO_DRBG=y +CONFIG_CRYPTO_DRBG_HMAC=y +CONFIG_CRYPTO_DRBG_MENU=y +CONFIG_CRYPTO_GF128MUL=y +CONFIG_CRYPTO_GHASH=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_JITTERENTROPY=y +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_NULL2=y +CONFIG_CRYPTO_PCOMP2=y +CONFIG_CRYPTO_RNG_DEFAULT=y +CONFIG_CRYPTO_SHA512=y +CONFIG_DMA_CMA=y +CONFIG_DMA_SHARED_BUFFER=y +CONFIG_DT_IDLE_STATES=y +CONFIG_EDAC_ATOMIC_SCRUB=y +CONFIG_EDAC_SUPPORT=y +CONFIG_EXTCON=y +CONFIG_EXTCON_USB_GPIO=y +CONFIG_FB_DEFERRED_IO=y +CONFIG_FREEZER=y +CONFIG_GPIO_WATCHDOG=y +CONFIG_GPIO_WATCHDOG_ARCH_INITCALL=y +CONFIG_IOMMU_API=y +CONFIG_IOMMU_DMA=y +CONFIG_IOMMU_IOVA=y +CONFIG_IOMMU_IO_PGTABLE=y +# CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set +CONFIG_IOMMU_IO_PGTABLE_LPAE=y +# CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set +CONFIG_IOMMU_SUPPORT=y +CONFIG_ION=y +CONFIG_ION_MSM=y +CONFIG_IPQ_APSS_6018=y +CONFIG_IPQ_GCC_6018=y +CONFIG_IPQ_MEM_PROFILE=256 +CONFIG_KASAN_SHADOW_OFFSET=0x5f000000 +CONFIG_LEDS_PCA9956B=y +CONFIG_MDIO_QCA=y +CONFIG_MEMORY_ISOLATION=y +CONFIG_MIGRATION=y +CONFIG_MPILIB=y +CONFIG_MSM_SECURE_BUFFER=y +CONFIG_NEED_SG_DMA_LENGTH=y +CONFIG_NET_SWITCHDEV=y +CONFIG_NUM_ALT_PARTITION=16 +CONFIG_OF_IOMMU=y +CONFIG_OID_REGISTRY=y +CONFIG_OUTER_CACHE=y +CONFIG_OUTER_CACHE_SYNC=y +CONFIG_PAGE_OFFSET=0x80000000 +CONFIG_PINCTRL_IPQ6018=y +# CONFIG_PKCS7_MESSAGE_PARSER is not set +# CONFIG_PL310_ERRATA_588369 is not set +# CONFIG_PL310_ERRATA_727915 is not set +# CONFIG_PL310_ERRATA_753970 is not set +# CONFIG_PL310_ERRATA_769419 is not set +CONFIG_PPS=y +CONFIG_PTP_1588_CLOCK=y +CONFIG_PWM_IPQ4019=y +CONFIG_QCOM_APM=y +CONFIG_QCOM_DCC=y +CONFIG_QCOM_QMI_HELPERS=y +CONFIG_REGMAP_I2C=y +CONFIG_REGMAP_SPMI=y +CONFIG_REGULATOR_CPR3=y +CONFIG_REGULATOR_CPR3_NPU=y +CONFIG_REGULATOR_CPR4_APSS=y +# CONFIG_SKB_FIXED_SIZE_2K is not set +CONFIG_SOC_BUS=y +CONFIG_SPS=y +CONFIG_SPS_SUPPORT_NDP_BAM=y +CONFIG_STAGING=y +CONFIG_SUSPEND_FREEZER=y +# CONFIG_USB_GADGET is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +# CONFIG_VFIO is not set +CONFIG_VIRTIO=y +CONFIG_VIRTUALIZATION=y +CONFIG_VMSPLIT_2G=y +# CONFIG_VMSPLIT_3G is not set +CONFIG_WANT_DEV_COREDUMP=y diff --git a/target/linux/ipq807x/ipq60xx/profiles/default.mk b/target/linux/ipq807x/ipq60xx/profiles/default.mk new file mode 100644 index 0000000000..f47e73acfc --- /dev/null +++ b/target/linux/ipq807x/ipq60xx/profiles/default.mk @@ -0,0 +1,9 @@ +define Profile/Default + NAME:=Default Profile (minimum package set) +endef + +define Profile/Default/Description +Default package set compatible with most boards. +endef +$(eval $(call Profile,Default)) + diff --git a/target/linux/ipq807x/ipq60xx/target.mk b/target/linux/ipq807x/ipq60xx/target.mk new file mode 100644 index 0000000000..cf2bf7b9cd --- /dev/null +++ b/target/linux/ipq807x/ipq60xx/target.mk @@ -0,0 +1,8 @@ +SUBTARGET:=ipq60xx +BOARDNAME:=IPQ60xx based boards + +DEFAULT_PACKAGES += ath11k-firmware-ipq60xx qca-nss-fw-ipq60xx + +define Target/Description + Build images for IPQ60xx systems. +endef diff --git a/target/linux/ipq807x/ipq807x/config-default b/target/linux/ipq807x/ipq807x/config-default new file mode 100644 index 0000000000..f1e8aadc9c --- /dev/null +++ b/target/linux/ipq807x/ipq807x/config-default @@ -0,0 +1,78 @@ +# CONFIG_AHCI_IPQ is not set +CONFIG_ARCH_IPQ807x=y +# CONFIG_DIAGFWD_BRIDGE_CODE is not set +CONFIG_IPQ_ADSS_807x=y +CONFIG_IPQ_APSS_807x=y +CONFIG_IPQ_GCC_807x=y +CONFIG_NET_SWITCHDEV=y +CONFIG_NUM_ALT_PARTITION=16 +CONFIG_PINCTRL_IPQ807x=y +# CONFIG_IPC_LOGGING is not set +CONFIG_IPQ_SUBSYSTEM_DUMP=y +CONFIG_SPS=y +CONFIG_SPS_SUPPORT_NDP_BAM=y +CONFIG_CORESIGHT=y +CONFIG_CORESIGHT_CSR=y +CONFIG_CORESIGHT_CTI=y +CONFIG_CORESIGHT_EVENT=y +CONFIG_CORESIGHT_HWEVENT=y +CONFIG_CORESIGHT_LINKS_AND_SINKS=y +CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y +CONFIG_CORESIGHT_QCOM_REPLICATOR=y +CONFIG_CORESIGHT_STREAM=m +# CONFIG_INPUT_PM8941_PWRKEY is not set +CONFIG_MDIO_QCA=y +# CONFIG_CRYPTO_ALL_CASES is not set +CONFIG_CRYPTO_DEV_QCOM_ICE=y +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_SHA512=y +# CONFIG_CORESIGHT_QPDI is not set +# CONFIG_CORESIGHT_SINK_ETBV10 is not set +CONFIG_CORESIGHT_SINK_TPIU=y +# CONFIG_CORESIGHT_SOURCE_DUMMY is not set +CONFIG_CORESIGHT_SOURCE_ETM3X=y +CONFIG_CORESIGHT_SOURCE_ETM4X=y +# CONFIG_CORESIGHT_REMOTE_ETM is not set +CONFIG_CORESIGHT_STM=y +CONFIG_CORESIGHT_TPDA=y +CONFIG_CORESIGHT_TPDM=y +CONFIG_AQUANTIA_PHY=y +# CONFIG_CORESIGHT_TPDM_DEFAULT_ENABLE is not set +CONFIG_IIO=y +# CONFIG_IIO_BUFFER is not set +# CONFIG_IIO_TRIGGER is not set +CONFIG_PCIE_DW_PLAT=y +CONFIG_VMSPLIT_2G=y +# CONFIG_VMSPLIT_3G is not set +CONFIG_PPS=y +CONFIG_PTP_1588_CLOCK=y +# CONFIG_DP83640_PHY is not set +CONFIG_PWM_IPQ4019=y +CONFIG_QCOM_APM=y +CONFIG_QCOM_DCC=y +# CONFIG_QCOM_SPMI_TEMP_ALARM is not set +CONFIG_QCOM_SPMI_VADC=y +CONFIG_REGMAP_ALLOW_WRITE_DEBUGFS=y +CONFIG_REGULATOR_CPR3=y +CONFIG_REGULATOR_CPR3_NPU=y +CONFIG_REGULATOR_CPR4_APSS=y +CONFIG_MMC_SDHCI_MSM_ICE=y +CONFIG_USB_BAM=y +CONFIG_USB_QCOM_KS_BRIDGE=m +CONFIG_MAILBOX=y +# CONFIG_USB_QCOM_DIAG_BRIDGE is not set +# CONFIG_USB_CONFIGFS_F_DIAG is not set +# CONFIG_NF_IPV6_DUMMY_HEADER is not set +CONFIG_RMNET=y +CONFIG_RMNET_DATA=y +CONFIG_RMNET_DATA_DEBUG_PKT=y +# CONFIG_RMNET_DATA_FC is not set +CONFIG_QCOM_QMI_RMNET=y +CONFIG_QCOM_QMI_DFC=y +CONFIG_QCOM_QMI_POWER_COLLAPSE=y +CONFIG_RMNET_CTL=y +CONFIG_RMNET_CTL_DEBUG=y +CONFIG_MHI_BUS_TEST=y +CONFIG_MHI_DEBUG=y +CONFIG_MHI_NETDEV=y +CONFIG_MHI_UCI=y diff --git a/target/linux/ipq807x/ipq807x/profiles/default.mk b/target/linux/ipq807x/ipq807x/profiles/default.mk new file mode 100644 index 0000000000..f47e73acfc --- /dev/null +++ b/target/linux/ipq807x/ipq807x/profiles/default.mk @@ -0,0 +1,9 @@ +define Profile/Default + NAME:=Default Profile (minimum package set) +endef + +define Profile/Default/Description +Default package set compatible with most boards. +endef +$(eval $(call Profile,Default)) + diff --git a/target/linux/ipq807x/ipq807x/target.mk b/target/linux/ipq807x/ipq807x/target.mk new file mode 100644 index 0000000000..7c24b66e60 --- /dev/null +++ b/target/linux/ipq807x/ipq807x/target.mk @@ -0,0 +1,7 @@ +SUBTARGET:=ipq807x +BOARDNAME:=IPQ807x based boards + +DEFAULT_PACKAGES += ath11k-firmware-ipq807x qca-nss-fw-ipq807x +define Target/Description + Build images for IPQ807x systems. +endef diff --git a/target/linux/ipq807x/modules.mk b/target/linux/ipq807x/modules.mk new file mode 100644 index 0000000000..0223ff2590 --- /dev/null +++ b/target/linux/ipq807x/modules.mk @@ -0,0 +1,61 @@ +define KernelPackage/usb-phy-ipq807x + TITLE:=DWC3 USB QCOM PHY driver for IPQ807x + DEPENDS:=@TARGET_ipq807x + KCONFIG:= \ + CONFIG_USB_QCOM_QUSB_PHY \ + CONFIG_USB_QCOM_QMP_PHY + FILES:= \ + $(LINUX_DIR)/drivers/usb/phy/phy-msm-qusb.ko \ + $(LINUX_DIR)/drivers/usb/phy/phy-msm-ssusb-qmp.ko + AUTOLOAD:=$(call AutoLoad,45,phy-msm-qusb phy-msm-ssusb-qmp,1) + $(call AddDepends/usb) +endef + +define KernelPackage/usb-phy-ipq807x/description + This driver provides support for the USB PHY drivers + within the IPQ807x SoCs. +endef + +$(eval $(call KernelPackage,usb-phy-ipq807x)) + + +define KernelPackage/qrtr_mproc + TITLE:= Ath11k Specific kernel configs for IPQ807x and IPQ60xx + DEPENDS+= @TARGET_ipq807x + KCONFIG:= \ + CONFIG_QRTR=y \ + CONFIG_QRTR_MHI=y \ + CONFIG_MHI_BUS=y \ + CONFIG_MHI_QTI=y \ + CONFIG_QCOM_APCS_IPC=y \ + CONFIG_QCOM_GLINK_SSR=y \ + CONFIG_QCOM_Q6V5_WCSS=y \ + CONFIG_MSM_RPM_RPMSG=y \ + CONFIG_RPMSG_QCOM_GLINK_RPM=y \ + CONFIG_REGULATOR_RPM_GLINK=y \ + CONFIG_QCOM_SYSMON=y \ + CONFIG_RPMSG=y \ + CONFIG_RPMSG_CHAR=y \ + CONFIG_RPMSG_QCOM_GLINK_SMEM=y \ + CONFIG_RPMSG_QCOM_SMD=y \ + CONFIG_QRTR_SMD=y \ + CONFIG_QCOM_QMI_HELPERS=y \ + CONFIG_SAMPLES=y \ + CONFIG_SAMPLE_QMI_CLIENT=m \ + CONFIG_SAMPLE_TRACE_EVENTS=n \ + CONFIG_SAMPLE_KOBJECT=n \ + CONFIG_SAMPLE_KPROBES=n \ + CONFIG_SAMPLE_KRETPROBES=n \ + CONFIG_SAMPLE_HW_BREAKPOINT=n \ + CONFIG_SAMPLE_KFIFO=n \ + CONFIG_SAMPLE_CONFIGFS=n \ + CONFIG_SAMPLE_RPMSG_CLIENT=n \ + CONFIG_MAILBOX=y \ + CONFIG_DIAG_OVER_QRTR=y +endef + +define KernelPackage/qrtr_mproc/description +Kernel configs for ath11k support specific to ipq807x and IPQ60xx +endef + +$(eval $(call KernelPackage,qrtr_mproc)) diff --git a/target/linux/ipq807x/patches/100-qrtr-ns.patch b/target/linux/ipq807x/patches/100-qrtr-ns.patch new file mode 100644 index 0000000000..850e644778 --- /dev/null +++ b/target/linux/ipq807x/patches/100-qrtr-ns.patch @@ -0,0 +1,976 @@ +Index: linux-4.4.60/net/qrtr/ns.c +=================================================================== +--- /dev/null ++++ linux-4.4.60/net/qrtr/ns.c +@@ -0,0 +1,760 @@ ++// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause ++/* ++ * Copyright (c) 2015, Sony Mobile Communications Inc. ++ * Copyright (c) 2013, The Linux Foundation. All rights reserved. ++ * Copyright (c) 2020, Linaro Ltd. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include "qrtr.h" ++ ++#define CREATE_TRACE_POINTS ++#include ++ ++static RADIX_TREE(nodes, GFP_KERNEL); ++ ++static struct { ++ struct socket *sock; ++ struct sockaddr_qrtr bcast_sq; ++ struct list_head lookups; ++ struct workqueue_struct *workqueue; ++ struct work_struct work; ++ int local_node; ++} qrtr_ns; ++ ++static const char * const qrtr_ctrl_pkt_strings[] = { ++ [QRTR_TYPE_HELLO] = "hello", ++ [QRTR_TYPE_BYE] = "bye", ++ [QRTR_TYPE_NEW_SERVER] = "new-server", ++ [QRTR_TYPE_DEL_SERVER] = "del-server", ++ [QRTR_TYPE_DEL_CLIENT] = "del-client", ++ [QRTR_TYPE_RESUME_TX] = "resume-tx", ++ [QRTR_TYPE_EXIT] = "exit", ++ [QRTR_TYPE_PING] = "ping", ++ [QRTR_TYPE_NEW_LOOKUP] = "new-lookup", ++ [QRTR_TYPE_DEL_LOOKUP] = "del-lookup", ++}; ++ ++struct qrtr_server_filter { ++ unsigned int service; ++ unsigned int instance; ++ unsigned int ifilter; ++}; ++ ++struct qrtr_lookup { ++ unsigned int service; ++ unsigned int instance; ++ ++ struct sockaddr_qrtr sq; ++ struct list_head li; ++}; ++ ++struct qrtr_server { ++ unsigned int service; ++ unsigned int instance; ++ ++ unsigned int node; ++ unsigned int port; ++ ++ struct list_head qli; ++}; ++ ++struct qrtr_node { ++ unsigned int id; ++ struct radix_tree_root servers; ++}; ++ ++static struct qrtr_node *node_get(unsigned int node_id) ++{ ++ struct qrtr_node *node; ++ ++ node = radix_tree_lookup(&nodes, node_id); ++ if (node) ++ return node; ++ ++ /* If node didn't exist, allocate and insert it to the tree */ ++ node = kzalloc(sizeof(*node), GFP_KERNEL); ++ if (!node) ++ return NULL; ++ ++ node->id = node_id; ++ ++ radix_tree_insert(&nodes, node_id, node); ++ ++ return node; ++} ++ ++static int server_match(const struct qrtr_server *srv, ++ const struct qrtr_server_filter *f) ++{ ++ unsigned int ifilter = f->ifilter; ++ ++ if (f->service != 0 && srv->service != f->service) ++ return 0; ++ if (!ifilter && f->instance) ++ ifilter = ~0; ++ ++ return (srv->instance & ifilter) == f->instance; ++} ++ ++static int service_announce_new(struct sockaddr_qrtr *dest, ++ struct qrtr_server *srv) ++{ ++ struct qrtr_ctrl_pkt pkt; ++ struct msghdr msg = { }; ++ struct kvec iv; ++ ++ trace_qrtr_ns_service_announce_new(srv->service, srv->instance, ++ srv->node, srv->port); ++ ++ iv.iov_base = &pkt; ++ iv.iov_len = sizeof(pkt); ++ ++ memset(&pkt, 0, sizeof(pkt)); ++ pkt.cmd = cpu_to_le32(QRTR_TYPE_NEW_SERVER); ++ pkt.server.service = cpu_to_le32(srv->service); ++ pkt.server.instance = cpu_to_le32(srv->instance); ++ pkt.server.node = cpu_to_le32(srv->node); ++ pkt.server.port = cpu_to_le32(srv->port); ++ ++ msg.msg_name = (struct sockaddr *)dest; ++ msg.msg_namelen = sizeof(*dest); ++ ++ return kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); ++} ++ ++static int service_announce_del(struct sockaddr_qrtr *dest, ++ struct qrtr_server *srv) ++{ ++ struct qrtr_ctrl_pkt pkt; ++ struct msghdr msg = { }; ++ struct kvec iv; ++ int ret; ++ ++ trace_qrtr_ns_service_announce_del(srv->service, srv->instance, ++ srv->node, srv->port); ++ ++ iv.iov_base = &pkt; ++ iv.iov_len = sizeof(pkt); ++ ++ memset(&pkt, 0, sizeof(pkt)); ++ pkt.cmd = cpu_to_le32(QRTR_TYPE_DEL_SERVER); ++ pkt.server.service = cpu_to_le32(srv->service); ++ pkt.server.instance = cpu_to_le32(srv->instance); ++ pkt.server.node = cpu_to_le32(srv->node); ++ pkt.server.port = cpu_to_le32(srv->port); ++ ++ msg.msg_name = (struct sockaddr *)dest; ++ msg.msg_namelen = sizeof(*dest); ++ ++ ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); ++ if (ret < 0) ++ pr_err("failed to announce del service\n"); ++ ++ return ret; ++} ++ ++static void lookup_notify(struct sockaddr_qrtr *to, struct qrtr_server *srv, ++ bool new) ++{ ++ struct qrtr_ctrl_pkt pkt; ++ struct msghdr msg = { }; ++ struct kvec iv; ++ int ret; ++ ++ iv.iov_base = &pkt; ++ iv.iov_len = sizeof(pkt); ++ ++ memset(&pkt, 0, sizeof(pkt)); ++ pkt.cmd = new ? cpu_to_le32(QRTR_TYPE_NEW_SERVER) : ++ cpu_to_le32(QRTR_TYPE_DEL_SERVER); ++ if (srv) { ++ pkt.server.service = cpu_to_le32(srv->service); ++ pkt.server.instance = cpu_to_le32(srv->instance); ++ pkt.server.node = cpu_to_le32(srv->node); ++ pkt.server.port = cpu_to_le32(srv->port); ++ } ++ ++ msg.msg_name = (struct sockaddr *)to; ++ msg.msg_namelen = sizeof(*to); ++ ++ ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); ++ if (ret < 0) ++ pr_err("failed to send lookup notification\n"); ++} ++ ++static int announce_servers(struct sockaddr_qrtr *sq) ++{ ++ struct radix_tree_iter iter; ++ struct qrtr_server *srv; ++ struct qrtr_node *node; ++ void __rcu **slot; ++ int ret; ++ ++ node = node_get(qrtr_ns.local_node); ++ if (!node) ++ return 0; ++ ++ /* Announce the list of servers registered in this node */ ++ radix_tree_for_each_slot(slot, &node->servers, &iter, 0) { ++ srv = radix_tree_deref_slot(slot); ++ ++ ret = service_announce_new(sq, srv); ++ if (ret < 0) { ++ pr_err("failed to announce new service\n"); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static struct qrtr_server *server_add(unsigned int service, ++ unsigned int instance, ++ unsigned int node_id, ++ unsigned int port) ++{ ++ struct qrtr_server *srv; ++ struct qrtr_server *old; ++ struct qrtr_node *node; ++ ++ if (!service || !port) ++ return NULL; ++ ++ srv = kzalloc(sizeof(*srv), GFP_KERNEL); ++ if (!srv) ++ return NULL; ++ ++ srv->service = service; ++ srv->instance = instance; ++ srv->node = node_id; ++ srv->port = port; ++ ++ node = node_get(node_id); ++ if (!node) ++ goto err; ++ ++ /* Delete the old server on the same port */ ++ old = radix_tree_lookup(&node->servers, port); ++ if (old) { ++ radix_tree_delete(&node->servers, port); ++ kfree(old); ++ } ++ ++ radix_tree_insert(&node->servers, port, srv); ++ ++ trace_qrtr_ns_server_add(srv->service, srv->instance, ++ srv->node, srv->port); ++ ++ return srv; ++ ++err: ++ kfree(srv); ++ return NULL; ++} ++ ++static int server_del(struct qrtr_node *node, unsigned int port) ++{ ++ struct qrtr_lookup *lookup; ++ struct qrtr_server *srv; ++ struct list_head *li; ++ ++ srv = radix_tree_lookup(&node->servers, port); ++ if (!srv) ++ return -ENOENT; ++ ++ radix_tree_delete(&node->servers, port); ++ ++ /* Broadcast the removal of local servers */ ++ if (srv->node == qrtr_ns.local_node) ++ service_announce_del(&qrtr_ns.bcast_sq, srv); ++ ++ /* Announce the service's disappearance to observers */ ++ list_for_each(li, &qrtr_ns.lookups) { ++ lookup = container_of(li, struct qrtr_lookup, li); ++ if (lookup->service && lookup->service != srv->service) ++ continue; ++ if (lookup->instance && lookup->instance != srv->instance) ++ continue; ++ ++ lookup_notify(&lookup->sq, srv, false); ++ } ++ ++ kfree(srv); ++ ++ return 0; ++} ++ ++static int say_hello(struct sockaddr_qrtr *dest) ++{ ++ struct qrtr_ctrl_pkt pkt; ++ struct msghdr msg = { }; ++ struct kvec iv; ++ int ret; ++ ++ iv.iov_base = &pkt; ++ iv.iov_len = sizeof(pkt); ++ ++ memset(&pkt, 0, sizeof(pkt)); ++ pkt.cmd = cpu_to_le32(QRTR_TYPE_HELLO); ++ ++ msg.msg_name = (struct sockaddr *)dest; ++ msg.msg_namelen = sizeof(*dest); ++ ++ ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); ++ if (ret < 0) ++ pr_err("failed to send hello msg\n"); ++ ++ return ret; ++} ++ ++/* Announce the list of servers registered on the local node */ ++static int ctrl_cmd_hello(struct sockaddr_qrtr *sq) ++{ ++ int ret; ++ ++ ret = say_hello(sq); ++ if (ret < 0) ++ return ret; ++ ++ return announce_servers(sq); ++} ++ ++static int ctrl_cmd_bye(struct sockaddr_qrtr *from) ++{ ++ struct qrtr_node *local_node; ++ struct radix_tree_iter iter; ++ struct qrtr_ctrl_pkt pkt; ++ struct qrtr_server *srv; ++ struct sockaddr_qrtr sq; ++ struct msghdr msg = { }; ++ struct qrtr_node *node; ++ void __rcu **slot; ++ struct kvec iv; ++ int ret; ++ ++ iv.iov_base = &pkt; ++ iv.iov_len = sizeof(pkt); ++ ++ node = node_get(from->sq_node); ++ if (!node) ++ return 0; ++ ++ /* Advertise removal of this client to all servers of remote node */ ++ radix_tree_for_each_slot(slot, &node->servers, &iter, 0) { ++ srv = radix_tree_deref_slot(slot); ++ server_del(node, srv->port); ++ } ++ ++ /* Advertise the removal of this client to all local servers */ ++ local_node = node_get(qrtr_ns.local_node); ++ if (!local_node) ++ return 0; ++ ++ memset(&pkt, 0, sizeof(pkt)); ++ pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE); ++ pkt.client.node = cpu_to_le32(from->sq_node); ++ ++ radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) { ++ srv = radix_tree_deref_slot(slot); ++ ++ sq.sq_family = AF_QIPCRTR; ++ sq.sq_node = srv->node; ++ sq.sq_port = srv->port; ++ ++ msg.msg_name = (struct sockaddr *)&sq; ++ msg.msg_namelen = sizeof(sq); ++ ++ ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); ++ if (ret < 0) { ++ pr_err("failed to send bye cmd\n"); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, ++ unsigned int node_id, unsigned int port) ++{ ++ struct qrtr_node *local_node; ++ struct radix_tree_iter iter; ++ struct qrtr_lookup *lookup; ++ struct qrtr_ctrl_pkt pkt; ++ struct msghdr msg = { }; ++ struct qrtr_server *srv; ++ struct sockaddr_qrtr sq; ++ struct qrtr_node *node; ++ struct list_head *tmp; ++ struct list_head *li; ++ void __rcu **slot; ++ struct kvec iv; ++ int ret; ++ ++ iv.iov_base = &pkt; ++ iv.iov_len = sizeof(pkt); ++ ++ /* Don't accept spoofed messages */ ++ if (from->sq_node != node_id) ++ return -EINVAL; ++ ++ /* Local DEL_CLIENT messages comes from the port being closed */ ++ if (from->sq_node == qrtr_ns.local_node && from->sq_port != port) ++ return -EINVAL; ++ ++ /* Remove any lookups by this client */ ++ list_for_each_safe(li, tmp, &qrtr_ns.lookups) { ++ lookup = container_of(li, struct qrtr_lookup, li); ++ if (lookup->sq.sq_node != node_id) ++ continue; ++ if (lookup->sq.sq_port != port) ++ continue; ++ ++ list_del(&lookup->li); ++ kfree(lookup); ++ } ++ ++ /* Remove the server belonging to this port */ ++ node = node_get(node_id); ++ if (node) ++ server_del(node, port); ++ ++ /* Advertise the removal of this client to all local servers */ ++ local_node = node_get(qrtr_ns.local_node); ++ if (!local_node) ++ return 0; ++ ++ memset(&pkt, 0, sizeof(pkt)); ++ pkt.cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); ++ pkt.client.node = cpu_to_le32(node_id); ++ pkt.client.port = cpu_to_le32(port); ++ ++ radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) { ++ srv = radix_tree_deref_slot(slot); ++ ++ sq.sq_family = AF_QIPCRTR; ++ sq.sq_node = srv->node; ++ sq.sq_port = srv->port; ++ ++ msg.msg_name = (struct sockaddr *)&sq; ++ msg.msg_namelen = sizeof(sq); ++ ++ ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); ++ if (ret < 0) { ++ pr_err("failed to send del client cmd\n"); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static int ctrl_cmd_new_server(struct sockaddr_qrtr *from, ++ unsigned int service, unsigned int instance, ++ unsigned int node_id, unsigned int port) ++{ ++ struct qrtr_lookup *lookup; ++ struct qrtr_server *srv; ++ struct list_head *li; ++ int ret = 0; ++ ++ /* Ignore specified node and port for local servers */ ++ if (from->sq_node == qrtr_ns.local_node) { ++ node_id = from->sq_node; ++ port = from->sq_port; ++ } ++ ++ /* Don't accept spoofed messages */ ++ if (from->sq_node != node_id) ++ return -EINVAL; ++ ++ srv = server_add(service, instance, node_id, port); ++ if (!srv) ++ return -EINVAL; ++ ++ if (srv->node == qrtr_ns.local_node) { ++ ret = service_announce_new(&qrtr_ns.bcast_sq, srv); ++ if (ret < 0) { ++ pr_err("failed to announce new service\n"); ++ return ret; ++ } ++ } ++ ++ /* Notify any potential lookups about the new server */ ++ list_for_each(li, &qrtr_ns.lookups) { ++ lookup = container_of(li, struct qrtr_lookup, li); ++ if (lookup->service && lookup->service != service) ++ continue; ++ if (lookup->instance && lookup->instance != instance) ++ continue; ++ ++ lookup_notify(&lookup->sq, srv, true); ++ } ++ ++ return ret; ++} ++ ++static int ctrl_cmd_del_server(struct sockaddr_qrtr *from, ++ unsigned int service, unsigned int instance, ++ unsigned int node_id, unsigned int port) ++{ ++ struct qrtr_node *node; ++ ++ /* Ignore specified node and port for local servers*/ ++ if (from->sq_node == qrtr_ns.local_node) { ++ node_id = from->sq_node; ++ port = from->sq_port; ++ } ++ ++ /* Don't accept spoofed messages */ ++ if (from->sq_node != node_id) ++ return -EINVAL; ++ ++ /* Local servers may only unregister themselves */ ++ if (from->sq_node == qrtr_ns.local_node && from->sq_port != port) ++ return -EINVAL; ++ ++ node = node_get(node_id); ++ if (!node) ++ return -ENOENT; ++ ++ return server_del(node, port); ++} ++ ++static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, ++ unsigned int service, unsigned int instance) ++{ ++ struct radix_tree_iter node_iter; ++ struct qrtr_server_filter filter; ++ struct radix_tree_iter srv_iter; ++ struct qrtr_lookup *lookup; ++ struct qrtr_node *node; ++ void __rcu **node_slot; ++ void __rcu **srv_slot; ++ ++ /* Accept only local observers */ ++ if (from->sq_node != qrtr_ns.local_node) ++ return -EINVAL; ++ ++ lookup = kzalloc(sizeof(*lookup), GFP_KERNEL); ++ if (!lookup) ++ return -ENOMEM; ++ ++ lookup->sq = *from; ++ lookup->service = service; ++ lookup->instance = instance; ++ list_add_tail(&lookup->li, &qrtr_ns.lookups); ++ ++ memset(&filter, 0, sizeof(filter)); ++ filter.service = service; ++ filter.instance = instance; ++ ++ radix_tree_for_each_slot(node_slot, &nodes, &node_iter, 0) { ++ node = radix_tree_deref_slot(node_slot); ++ ++ radix_tree_for_each_slot(srv_slot, &node->servers, ++ &srv_iter, 0) { ++ struct qrtr_server *srv; ++ ++ srv = radix_tree_deref_slot(srv_slot); ++ if (!server_match(srv, &filter)) ++ continue; ++ ++ lookup_notify(from, srv, true); ++ } ++ } ++ ++ /* Empty notification, to indicate end of listing */ ++ lookup_notify(from, NULL, true); ++ ++ return 0; ++} ++ ++static void ctrl_cmd_del_lookup(struct sockaddr_qrtr *from, ++ unsigned int service, unsigned int instance) ++{ ++ struct qrtr_lookup *lookup; ++ struct list_head *tmp; ++ struct list_head *li; ++ ++ list_for_each_safe(li, tmp, &qrtr_ns.lookups) { ++ lookup = container_of(li, struct qrtr_lookup, li); ++ if (lookup->sq.sq_node != from->sq_node) ++ continue; ++ if (lookup->sq.sq_port != from->sq_port) ++ continue; ++ if (lookup->service != service) ++ continue; ++ if (lookup->instance && lookup->instance != instance) ++ continue; ++ ++ list_del(&lookup->li); ++ kfree(lookup); ++ } ++} ++ ++static void qrtr_ns_worker(struct work_struct *work) ++{ ++ const struct qrtr_ctrl_pkt *pkt; ++ size_t recv_buf_size = 4096; ++ struct sockaddr_qrtr sq; ++ struct msghdr msg = { }; ++ unsigned int cmd; ++ ssize_t msglen; ++ void *recv_buf; ++ struct kvec iv; ++ int ret; ++ ++ msg.msg_name = (struct sockaddr *)&sq; ++ msg.msg_namelen = sizeof(sq); ++ ++ recv_buf = kzalloc(recv_buf_size, GFP_KERNEL); ++ if (!recv_buf) ++ return; ++ ++ for (;;) { ++ iv.iov_base = recv_buf; ++ iv.iov_len = recv_buf_size; ++ ++ msglen = kernel_recvmsg(qrtr_ns.sock, &msg, &iv, 1, ++ iv.iov_len, MSG_DONTWAIT); ++ ++ if (msglen == -EAGAIN) ++ break; ++ ++ if (msglen < 0) { ++ pr_err("error receiving packet: %zd\n", msglen); ++ break; ++ } ++ ++ pkt = recv_buf; ++ cmd = le32_to_cpu(pkt->cmd); ++ if (cmd < ARRAY_SIZE(qrtr_ctrl_pkt_strings) && ++ qrtr_ctrl_pkt_strings[cmd]) ++ trace_qrtr_ns_message(qrtr_ctrl_pkt_strings[cmd], ++ sq.sq_node, sq.sq_port); ++ ++ ret = 0; ++ switch (cmd) { ++ case QRTR_TYPE_HELLO: ++ ret = ctrl_cmd_hello(&sq); ++ break; ++ case QRTR_TYPE_BYE: ++ ret = ctrl_cmd_bye(&sq); ++ break; ++ case QRTR_TYPE_DEL_CLIENT: ++ ret = ctrl_cmd_del_client(&sq, ++ le32_to_cpu(pkt->client.node), ++ le32_to_cpu(pkt->client.port)); ++ break; ++ case QRTR_TYPE_NEW_SERVER: ++ ret = ctrl_cmd_new_server(&sq, ++ le32_to_cpu(pkt->server.service), ++ le32_to_cpu(pkt->server.instance), ++ le32_to_cpu(pkt->server.node), ++ le32_to_cpu(pkt->server.port)); ++ break; ++ case QRTR_TYPE_DEL_SERVER: ++ ret = ctrl_cmd_del_server(&sq, ++ le32_to_cpu(pkt->server.service), ++ le32_to_cpu(pkt->server.instance), ++ le32_to_cpu(pkt->server.node), ++ le32_to_cpu(pkt->server.port)); ++ break; ++ case QRTR_TYPE_EXIT: ++ case QRTR_TYPE_PING: ++ case QRTR_TYPE_RESUME_TX: ++ break; ++ case QRTR_TYPE_NEW_LOOKUP: ++ ret = ctrl_cmd_new_lookup(&sq, ++ le32_to_cpu(pkt->server.service), ++ le32_to_cpu(pkt->server.instance)); ++ break; ++ case QRTR_TYPE_DEL_LOOKUP: ++ ctrl_cmd_del_lookup(&sq, ++ le32_to_cpu(pkt->server.service), ++ le32_to_cpu(pkt->server.instance)); ++ break; ++ } ++ ++ if (ret < 0) ++ pr_err("failed while handling packet from %d:%d", ++ sq.sq_node, sq.sq_port); ++ } ++ ++ kfree(recv_buf); ++} ++ ++static void qrtr_ns_data_ready(struct sock *sk) ++{ ++ queue_work(qrtr_ns.workqueue, &qrtr_ns.work); ++} ++ ++void qrtr_ns_init(void) ++{ ++ struct sockaddr_qrtr sq; ++ int sl = sizeof(sq); ++ int ret; ++ ++ INIT_LIST_HEAD(&qrtr_ns.lookups); ++ INIT_WORK(&qrtr_ns.work, qrtr_ns_worker); ++ ++ ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM, ++ PF_QIPCRTR, &qrtr_ns.sock); ++ if (ret < 0) ++ return; ++ ++ ret = kernel_getsockname(qrtr_ns.sock, (struct sockaddr *)&sq, &sl); ++ if (ret < 0) { ++ pr_err("failed to get socket name\n"); ++ goto err_sock; ++ } ++ ++ qrtr_ns.workqueue = alloc_workqueue("qrtr_ns_handler", WQ_UNBOUND, 1); ++ if (!qrtr_ns.workqueue) ++ goto err_sock; ++ ++ qrtr_ns.sock->sk->sk_data_ready = qrtr_ns_data_ready; ++ ++ sq.sq_port = QRTR_PORT_CTRL; ++ qrtr_ns.local_node = sq.sq_node; ++ ++ ret = kernel_bind(qrtr_ns.sock, (struct sockaddr *)&sq, sizeof(sq)); ++ if (ret < 0) { ++ pr_err("failed to bind to socket\n"); ++ goto err_wq; ++ } ++ ++ qrtr_ns.bcast_sq.sq_family = AF_QIPCRTR; ++ qrtr_ns.bcast_sq.sq_node = QRTR_NODE_BCAST; ++ qrtr_ns.bcast_sq.sq_port = QRTR_PORT_CTRL; ++ ++ ret = say_hello(&qrtr_ns.bcast_sq); ++ if (ret < 0) ++ goto err_wq; ++ ++ return; ++ ++err_wq: ++ destroy_workqueue(qrtr_ns.workqueue); ++err_sock: ++ sock_release(qrtr_ns.sock); ++} ++EXPORT_SYMBOL_GPL(qrtr_ns_init); ++ ++void qrtr_ns_remove(void) ++{ ++ cancel_work_sync(&qrtr_ns.work); ++ destroy_workqueue(qrtr_ns.workqueue); ++ sock_release(qrtr_ns.sock); ++} ++EXPORT_SYMBOL_GPL(qrtr_ns_remove); ++ ++MODULE_AUTHOR("Manivannan Sadhasivam "); ++MODULE_DESCRIPTION("Qualcomm IPC Router Nameservice"); ++MODULE_LICENSE("Dual BSD/GPL"); +Index: linux-4.4.60/net/qrtr/qrtr.c +=================================================================== +--- linux-4.4.60.orig/net/qrtr/qrtr.c ++++ linux-4.4.60/net/qrtr/qrtr.c +@@ -135,6 +135,8 @@ static DEFINE_IDR(qrtr_ports); + static DEFINE_MUTEX(qrtr_port_lock); + static DEFINE_MUTEX(qrtr_node_locking); + ++static struct delayed_work qrtr_ns_work; ++ + /** + * struct qrtr_node - endpoint node + * @ep_lock: lock for endpoint management and callbacks +@@ -1765,33 +1767,6 @@ static int qrtr_create(struct net *net, + return 0; + } + +-static const struct nla_policy qrtr_policy[IFA_MAX + 1] = { +- [IFA_LOCAL] = { .type = NLA_U32 }, +-}; +- +-static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) +-{ +- struct nlattr *tb[IFA_MAX + 1]; +- struct ifaddrmsg *ifm; +- int rc; +- +- if (!netlink_capable(skb, CAP_NET_ADMIN)) +- return -EPERM; +- +- ASSERT_RTNL(); +- +- rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy); +- if (rc < 0) +- return rc; +- +- ifm = nlmsg_data(nlh); +- if (!tb[IFA_LOCAL]) +- return -EINVAL; +- +- qrtr_local_nid = nla_get_u32(tb[IFA_LOCAL]); +- return 0; +-} +- + static const struct net_proto_family qrtr_family = { + .owner = THIS_MODULE, + .family = AF_QIPCRTR, +@@ -1811,7 +1786,8 @@ static int __init qrtr_proto_init(void) + proto_unregister(&qrtr_proto); + return rc; + } +- rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL); ++ ++ qrtr_ns_init(); + + return 0; + } +@@ -1819,7 +1795,8 @@ postcore_initcall(qrtr_proto_init); + + static void __exit qrtr_proto_fini(void) + { +- rtnl_unregister(PF_QIPCRTR, RTM_NEWADDR); ++ cancel_delayed_work_sync(&qrtr_ns_work); ++ qrtr_ns_remove(); + sock_unregister(qrtr_family.family); + proto_unregister(&qrtr_proto); + } +Index: linux-4.4.60/net/qrtr/qrtr.h +=================================================================== +--- linux-4.4.60.orig/net/qrtr/qrtr.h ++++ linux-4.4.60/net/qrtr/qrtr.h +@@ -33,4 +33,9 @@ void qrtr_endpoint_unregister(struct qrt + int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len); + + int qrtr_peek_pkt_size(const void *data); ++ ++void qrtr_ns_init(void); ++ ++void qrtr_ns_remove(void); ++ + #endif +Index: linux-4.4.60/net/qrtr/Makefile +=================================================================== +--- linux-4.4.60.orig/net/qrtr/Makefile ++++ linux-4.4.60/net/qrtr/Makefile +@@ -1,4 +1,4 @@ +-obj-$(CONFIG_QRTR) := qrtr.o ++obj-$(CONFIG_QRTR) := qrtr.o ns.o + + obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o + qrtr-smd-y := smd.o +Index: linux-4.4.60/include/trace/events/qrtr.h +=================================================================== +--- /dev/null ++++ linux-4.4.60/include/trace/events/qrtr.h +@@ -0,0 +1,115 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM qrtr ++ ++#if !defined(_TRACE_QRTR_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_QRTR_H ++ ++#include ++#include ++ ++TRACE_EVENT(qrtr_ns_service_announce_new, ++ ++ TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port), ++ ++ TP_ARGS(service, instance, node, port), ++ ++ TP_STRUCT__entry( ++ __field(__le32, service) ++ __field(__le32, instance) ++ __field(__le32, node) ++ __field(__le32, port) ++ ), ++ ++ TP_fast_assign( ++ __entry->service = service; ++ __entry->instance = instance; ++ __entry->node = node; ++ __entry->port = port; ++ ), ++ ++ TP_printk("advertising new server [%d:%x]@[%d:%d]", ++ __entry->service, __entry->instance, __entry->node, ++ __entry->port ++ ) ++); ++ ++TRACE_EVENT(qrtr_ns_service_announce_del, ++ ++ TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port), ++ ++ TP_ARGS(service, instance, node, port), ++ ++ TP_STRUCT__entry( ++ __field(__le32, service) ++ __field(__le32, instance) ++ __field(__le32, node) ++ __field(__le32, port) ++ ), ++ ++ TP_fast_assign( ++ __entry->service = service; ++ __entry->instance = instance; ++ __entry->node = node; ++ __entry->port = port; ++ ), ++ ++ TP_printk("advertising removal of server [%d:%x]@[%d:%d]", ++ __entry->service, __entry->instance, __entry->node, ++ __entry->port ++ ) ++); ++ ++TRACE_EVENT(qrtr_ns_server_add, ++ ++ TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port), ++ ++ TP_ARGS(service, instance, node, port), ++ ++ TP_STRUCT__entry( ++ __field(__le32, service) ++ __field(__le32, instance) ++ __field(__le32, node) ++ __field(__le32, port) ++ ), ++ ++ TP_fast_assign( ++ __entry->service = service; ++ __entry->instance = instance; ++ __entry->node = node; ++ __entry->port = port; ++ ), ++ ++ TP_printk("add server [%d:%x]@[%d:%d]", ++ __entry->service, __entry->instance, __entry->node, ++ __entry->port ++ ) ++); ++ ++TRACE_EVENT(qrtr_ns_message, ++ ++ TP_PROTO(const char * const ctrl_pkt_str, __u32 sq_node, __u32 sq_port), ++ ++ TP_ARGS(ctrl_pkt_str, sq_node, sq_port), ++ ++ TP_STRUCT__entry( ++ __string(ctrl_pkt_str, ctrl_pkt_str) ++ __field(__u32, sq_node) ++ __field(__u32, sq_port) ++ ), ++ ++ TP_fast_assign( ++ __assign_str(ctrl_pkt_str, ctrl_pkt_str); ++ __entry->sq_node = sq_node; ++ __entry->sq_port = sq_port; ++ ), ++ ++ TP_printk("%s from %d:%d", ++ __get_str(ctrl_pkt_str), __entry->sq_node, __entry->sq_port ++ ) ++); ++ ++#endif /* _TRACE_QRTR_H */ ++ ++/* This part must be outside protection */ ++#include diff --git a/target/linux/ipq807x/patches/101-squashfs.patch b/target/linux/ipq807x/patches/101-squashfs.patch new file mode 100644 index 0000000000..5744a2a5bf --- /dev/null +++ b/target/linux/ipq807x/patches/101-squashfs.patch @@ -0,0 +1,16 @@ +Index: linux-4.4.60/fs/squashfs/xz_wrapper.c +=================================================================== +--- linux-4.4.60.orig/fs/squashfs/xz_wrapper.c ++++ linux-4.4.60/fs/squashfs/xz_wrapper.c +@@ -40,10 +40,8 @@ struct squashfs_xz { + }; + + struct disk_comp_opts { +- __le32 flags; +- __le16 bit_opts; +- __le16 fb; + __le32 dictionary_size; ++ __le32 flags; + }; + + struct comp_opts { diff --git a/target/linux/ipq807x/patches/102-cig-wf188.patch b/target/linux/ipq807x/patches/102-cig-wf188.patch new file mode 100644 index 0000000000..05c736dfd7 --- /dev/null +++ b/target/linux/ipq807x/patches/102-cig-wf188.patch @@ -0,0 +1,869 @@ +Index: linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/arch/arm64/boot/dts/qcom/Makefile +=================================================================== +--- linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1.orig/arch/arm64/boot/dts/qcom/Makefile ++++ linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/arch/arm64/boot/dts/qcom/Makefile +@@ -30,6 +30,8 @@ dtb-$(CONFIG_ARCH_QCOM) += \ + qcom-ipq6018-cp01-c4.dtb \ + qcom-ipq6018-cp02-c1.dtb \ + qcom-ipq6018-cp03-c1.dtb \ ++ qcom-ipq6018-cig-wf188.dts \ ++ qcom-ipq6018-cig-wf188n.dts \ + qcom-ipq807x-hk01.dtb \ + qcom-ipq807x-hk01.c2.dtb \ + qcom-ipq807x-hk01.c3.dtb \ +Index: linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/arch/arm64/boot/dts/qcom/qcom-ipq6018-cig-wf188.dts +=================================================================== +--- /dev/null ++++ linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/arch/arm64/boot/dts/qcom/qcom-ipq6018-cig-wf188.dts +@@ -0,0 +1,423 @@ ++/dts-v1/; ++/* ++ * Copyright (c) 2019, The Linux Foundation. All rights reserved. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ */ ++ ++#include "qcom-ipq6018.dtsi" ++#include "qcom-ipq6018-rpm-regulator.dtsi" ++#include "qcom-ipq6018-cpr-regulator.dtsi" ++#include "qcom-ipq6018-cp-cpu.dtsi" ++#include ++#include ++ ++/ { ++ #address-cells = <0x2>; ++ #size-cells = <0x2>; ++ model = "Cigtech WF-188"; ++ compatible = "cig,wf188", "qcom,ipq6018-cp03", "qcom,ipq6018"; ++ interrupt-parent = <&intc>; ++ ++ aliases { ++ serial0 = &blsp1_uart3; ++ serial1 = &blsp1_uart2; ++ ++ /* ++ * Aliases as required by u-boot ++ * to patch MAC addresses ++ */ ++ ethernet0 = "/soc/dp1"; ++ ethernet1 = "/soc/dp2"; ++ ++ led-boot = &led_power; ++ led-failsafe = &led_power; ++ led-running = &led_power; ++ led-upgrade = &led_power; ++ }; ++ ++ chosen { ++ bootargs = "console=ttyMSM0,115200,n8 rw init=/init"; ++ bootargs-append = " swiotlb=1 coherent_pool=2M"; ++ }; ++ ++ /* ++ * +=========+==============+========================+ ++ * | | | | ++ * | Region | Start Offset | Size | ++ * | | | | ++ * +--------+--------------+-------------------------+ ++ * | | | | ++ * | | | | ++ * | | | | ++ * | | | | ++ * | Linux | 0x41000000 | 139MB | ++ * | | | | ++ * | | | | ++ * | | | | ++ * +--------+--------------+-------------------------+ ++ * | TZ App | 0x49B00000 | 6MB | ++ * +--------+--------------+-------------------------+ ++ * ++ * From the available 145 MB for Linux in the first 256 MB, ++ * we are reserving 6 MB for TZAPP. ++ * ++ * Refer arch/arm64/boot/dts/qcom/qcom-ipq6018-memory.dtsi ++ * for memory layout. ++ */ ++ ++/* TZAPP is enabled only in default memory profile */ ++#if !defined(__IPQ_MEM_PROFILE_256_MB__) && !defined(__IPQ_MEM_PROFILE_512_MB__) ++ reserved-memory { ++ tzapp:tzapp@49B00000 { /* TZAPPS */ ++ no-map; ++ reg = <0x0 0x49B00000 0x0 0x00600000>; ++ }; ++ }; ++#endif ++}; ++ ++&tlmm { ++ uart_pins: uart_pins { ++ mux { ++ pins = "gpio44", "gpio45"; ++ function = "blsp2_uart"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ spi_0_pins: spi_0_pins { ++ mux { ++ pins = "gpio38", "gpio39", "gpio40", "gpio41"; ++ function = "blsp0_spi"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ qpic_pins: qpic_pins { ++ data_0 { ++ pins = "gpio15"; ++ function = "qpic_pad0"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_1 { ++ pins = "gpio12"; ++ function = "qpic_pad1"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_2 { ++ pins = "gpio13"; ++ function = "qpic_pad2"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_3 { ++ pins = "gpio14"; ++ function = "qpic_pad3"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_4 { ++ pins = "gpio5"; ++ function = "qpic_pad4"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_5 { ++ pins = "gpio6"; ++ function = "qpic_pad5"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_6 { ++ pins = "gpio7"; ++ function = "qpic_pad6"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_7 { ++ pins = "gpio8"; ++ function = "qpic_pad7"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ qpic_pad { ++ pins = "gpio1", "gpio3", "gpio4", ++ "gpio10", "gpio11", "gpio17"; ++ function = "qpic_pad"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ button_pins: button_pins { ++ wps_button { ++ pins = "gpio22"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ mdio_pins: mdio_pinmux { ++ mux_0 { ++ pins = "gpio64"; ++ function = "mdc"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_1 { ++ pins = "gpio65"; ++ function = "mdio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_2 { ++ pins = "gpio75"; ++ function = "gpio"; ++ bias-pull-up; ++ }; ++ }; ++ ++ leds_pins: leds_pins { ++ led_5g { ++ pins = "gpio25"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_2g { ++ pins = "gpio24"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_eth { ++ pins = "gpio18"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_pwr { ++ pins = "gpio16"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ uart2_pins: uart2_pins { ++ mux { ++ pins = "gpio57", "gpio58"; ++ function = "blsp4_uart"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++}; ++ ++&soc { ++ mdio@90000 { ++ pinctrl-0 = <&mdio_pins>; ++ pinctrl-names = "default"; ++ phy-reset-gpio = <&tlmm 75 0>; ++ status = "ok"; ++ phy0: ethernet-phy@0 { ++ reg = <3>; ++ }; ++ phy1: ethernet-phy@1 { ++ reg = <4>; ++ }; ++ }; ++ ++ ++ ess-switch@3a000000 { ++ switch_cpu_bmp = <0x1>; /* cpu port bitmap */ ++ switch_lan_bmp = <0x08>; /* lan port bitmap */ ++ switch_wan_bmp = <0x10>; /* wan port bitmap */ ++ switch_inner_bmp = <0xc0>; /*inner port bitmap*/ ++ switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ ++ switch_mac_mode1 = <0xff>; /* mac mode for uniphy instance1*/ ++ switch_mac_mode2 = <0xff>; /* mac mode for uniphy instance2*/ ++ qcom,port_phyinfo { ++ port@3 { ++ port_id = <3>; ++ phy_address = <4>; ++ }; ++ port@4 { ++ port_id = <4>; ++ phy_address = <3>; ++ }; ++ }; ++ }; ++ ++ dp1 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <3>; ++ reg = <0x3a001400 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <4>; ++ phy-mode = "sgmii"; ++ }; ++ ++ dp2 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <4>; ++ reg = <0x3a001600 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <3>; ++ phy-mode = "sgmii"; ++ ++ }; ++ ++ gpio_keys { ++ compatible = "gpio-keys"; ++ pinctrl-0 = <&button_pins>; ++ pinctrl-names = "default"; ++ ++ wps { ++ label = "reset"; ++ linux,code = ; ++ gpios = <&tlmm 22 GPIO_ACTIVE_LOW>; ++ linux,input-type = <1>; ++ debounce-interval = <60>; ++ }; ++ }; ++ ++ leds { ++ compatible = "gpio-leds"; ++ pinctrl-0 = <&leds_pins>; ++ pinctrl-names = "default"; ++ ++ led@25 { ++ label = "green:wifi5"; ++ gpios = <&tlmm 25 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "wf188:green:5g"; ++ default-state = "off"; ++ }; ++ led@24 { ++ label = "green:wifi2"; ++ gpios = <&tlmm 24 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "wf188:green:2g"; ++ default-state = "off"; ++ }; ++ led@18 { ++ label = "green:wan"; ++ gpios = <&tlmm 18 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "wf188:green:eth"; ++ default-state = "off"; ++ }; ++ led_power: led@16 { ++ label = "green:power"; ++ gpios = <&tlmm 16 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "wf188:green:power"; ++ default-state = "off"; ++ }; ++ }; ++ ++ gpio-watchdog { ++ compatible = "linux,wdt-gpio"; ++ gpios = <&tlmm 35 GPIO_ACTIVE_HIGH>; ++ hw_algo = "toggle"; ++ hw_margin_ms = <5000>; ++ always-running; ++ }; ++}; ++ ++&blsp1_uart3 { ++ pinctrl-0 = <&uart_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&spi_0 { ++ pinctrl-0 = <&spi_0_pins>; ++ pinctrl-names = "default"; ++ cs-select = <0>; ++ status = "ok"; ++ ++ m25p80@0 { ++ #address-cells = <1>; ++ #size-cells = <1>; ++ reg = <0>; ++ compatible = "n25q128a11"; ++ linux,modalias = "m25p80", "n25q128a11"; ++ spi-max-frequency = <50000000>; ++ use-default-sizes; ++ }; ++}; ++ ++&blsp1_uart2 { ++ pinctrl-0 = <&uart2_pins>; ++ pinctrl-names = "default"; ++ dmas = <&blsp_dma 2>, ++ <&blsp_dma 3>; ++ dma-names = "tx", "rx"; ++ status = "ok"; ++}; ++&qpic_bam { ++ status = "ok"; ++}; ++ ++&nand { ++ pinctrl-0 = <&qpic_pins>; ++ pinctrl-names = "default"; ++ status = "disabled"; ++}; ++ ++&ssphy_0 { ++ status = "ok"; ++}; ++ ++&qusb_phy_0 { ++ status = "ok"; ++}; ++ ++&usb3 { ++ status = "ok"; ++}; ++ ++&nss_crypto { ++ status = "ok"; ++}; ++ ++&cpu0_opp_table { ++ compatible = "operating-points-v2"; ++ opp-shared; ++ opp03 { ++ opp-hz = /bits/ 64 <1200000000>; ++ opp-microvolt = <3>; ++ clock-latency-ns = <200000>; ++ }; ++ /delete-node/ opp04; ++ /delete-node/ opp05; ++ /delete-node/ opp06; ++}; ++ ++/* TZAPP is enabled in default memory profile only */ ++#if !defined(__IPQ_MEM_PROFILE_256_MB__) && !defined(__IPQ_MEM_PROFILE_512_MB__) ++&qseecom { ++ mem-start = <0x49B00000>; ++ mem-size = <0x600000>; ++ status = "ok"; ++}; ++#endif +Index: linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/arch/arm64/boot/dts/qcom/qcom-ipq6018-cig-wf188n.dts +=================================================================== +--- /dev/null ++++ linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/arch/arm64/boot/dts/qcom/qcom-ipq6018-cig-wf188n.dts +@@ -0,0 +1,423 @@ ++/dts-v1/; ++/* ++ * Copyright (c) 2019, The Linux Foundation. All rights reserved. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ */ ++ ++#include "qcom-ipq6018.dtsi" ++#include "qcom-ipq6018-rpm-regulator.dtsi" ++#include "qcom-ipq6018-cpr-regulator.dtsi" ++#include "qcom-ipq6018-cp-cpu.dtsi" ++#include ++#include ++ ++/ { ++ #address-cells = <0x2>; ++ #size-cells = <0x2>; ++ model = "Cigtech WF-188n"; ++ compatible = "cig,wf188n", "qcom,ipq6018-cp03", "qcom,ipq6018"; ++ interrupt-parent = <&intc>; ++ ++ aliases { ++ serial0 = &blsp1_uart3; ++ serial1 = &blsp1_uart2; ++ ++ /* ++ * Aliases as required by u-boot ++ * to patch MAC addresses ++ */ ++ ethernet0 = "/soc/dp1"; ++ ethernet1 = "/soc/dp2"; ++ ++ led-boot = &led_power; ++ led-failsafe = &led_power; ++ led-running = &led_power; ++ led-upgrade = &led_power; ++ }; ++ ++ chosen { ++ bootargs = "console=ttyMSM0,115200,n8 rw init=/init"; ++ bootargs-append = " swiotlb=1 coherent_pool=2M"; ++ }; ++ ++ /* ++ * +=========+==============+========================+ ++ * | | | | ++ * | Region | Start Offset | Size | ++ * | | | | ++ * +--------+--------------+-------------------------+ ++ * | | | | ++ * | | | | ++ * | | | | ++ * | | | | ++ * | Linux | 0x41000000 | 139MB | ++ * | | | | ++ * | | | | ++ * | | | | ++ * +--------+--------------+-------------------------+ ++ * | TZ App | 0x49B00000 | 6MB | ++ * +--------+--------------+-------------------------+ ++ * ++ * From the available 145 MB for Linux in the first 256 MB, ++ * we are reserving 6 MB for TZAPP. ++ * ++ * Refer arch/arm64/boot/dts/qcom/qcom-ipq6018-memory.dtsi ++ * for memory layout. ++ */ ++ ++/* TZAPP is enabled only in default memory profile */ ++#if !defined(__IPQ_MEM_PROFILE_256_MB__) && !defined(__IPQ_MEM_PROFILE_512_MB__) ++ reserved-memory { ++ tzapp:tzapp@49B00000 { /* TZAPPS */ ++ no-map; ++ reg = <0x0 0x49B00000 0x0 0x00600000>; ++ }; ++ }; ++#endif ++}; ++ ++&tlmm { ++ uart_pins: uart_pins { ++ mux { ++ pins = "gpio44", "gpio45"; ++ function = "blsp2_uart"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ spi_0_pins: spi_0_pins { ++ mux { ++ pins = "gpio38", "gpio39", "gpio40", "gpio41"; ++ function = "blsp0_spi"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ qpic_pins: qpic_pins { ++ data_0 { ++ pins = "gpio15"; ++ function = "qpic_pad0"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_1 { ++ pins = "gpio12"; ++ function = "qpic_pad1"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_2 { ++ pins = "gpio13"; ++ function = "qpic_pad2"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_3 { ++ pins = "gpio14"; ++ function = "qpic_pad3"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_4 { ++ pins = "gpio5"; ++ function = "qpic_pad4"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_5 { ++ pins = "gpio6"; ++ function = "qpic_pad5"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_6 { ++ pins = "gpio7"; ++ function = "qpic_pad6"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_7 { ++ pins = "gpio8"; ++ function = "qpic_pad7"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ qpic_pad { ++ pins = "gpio1", "gpio3", "gpio4", ++ "gpio10", "gpio11", "gpio17"; ++ function = "qpic_pad"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ button_pins: button_pins { ++ wps_button { ++ pins = "gpio22"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ mdio_pins: mdio_pinmux { ++ mux_0 { ++ pins = "gpio64"; ++ function = "mdc"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_1 { ++ pins = "gpio65"; ++ function = "mdio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_2 { ++ pins = "gpio75"; ++ function = "gpio"; ++ bias-pull-up; ++ }; ++ }; ++ ++ leds_pins: leds_pins { ++ led_5g { ++ pins = "gpio25"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_2g { ++ pins = "gpio24"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_eth { ++ pins = "gpio18"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_pwr { ++ pins = "gpio16"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ uart2_pins: uart2_pins { ++ mux { ++ pins = "gpio57", "gpio58"; ++ function = "blsp4_uart"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++}; ++ ++&soc { ++ mdio@90000 { ++ pinctrl-0 = <&mdio_pins>; ++ pinctrl-names = "default"; ++ phy-reset-gpio = <&tlmm 75 0>; ++ status = "ok"; ++ phy0: ethernet-phy@0 { ++ reg = <3>; ++ }; ++ phy1: ethernet-phy@1 { ++ reg = <4>; ++ }; ++ }; ++ ++ ++ ess-switch@3a000000 { ++ switch_cpu_bmp = <0x1>; /* cpu port bitmap */ ++ switch_lan_bmp = <0x08>; /* lan port bitmap */ ++ switch_wan_bmp = <0x10>; /* wan port bitmap */ ++ switch_inner_bmp = <0xc0>; /*inner port bitmap*/ ++ switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ ++ switch_mac_mode1 = <0xff>; /* mac mode for uniphy instance1*/ ++ switch_mac_mode2 = <0xff>; /* mac mode for uniphy instance2*/ ++ qcom,port_phyinfo { ++ port@3 { ++ port_id = <3>; ++ phy_address = <4>; ++ }; ++ port@4 { ++ port_id = <4>; ++ phy_address = <3>; ++ }; ++ }; ++ }; ++ ++ dp1 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <3>; ++ reg = <0x3a001400 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <4>; ++ phy-mode = "sgmii"; ++ }; ++ ++ dp2 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <4>; ++ reg = <0x3a001600 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <3>; ++ phy-mode = "sgmii"; ++ ++ }; ++ ++ gpio_keys { ++ compatible = "gpio-keys"; ++ pinctrl-0 = <&button_pins>; ++ pinctrl-names = "default"; ++ ++ wps { ++ label = "reset"; ++ linux,code = ; ++ gpios = <&tlmm 22 GPIO_ACTIVE_LOW>; ++ linux,input-type = <1>; ++ debounce-interval = <60>; ++ }; ++ }; ++ ++ leds { ++ compatible = "gpio-leds"; ++ pinctrl-0 = <&leds_pins>; ++ pinctrl-names = "default"; ++ ++ led@25 { ++ label = "green:wifi5"; ++ gpios = <&tlmm 25 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "wf188:green:5g"; ++ default-state = "off"; ++ }; ++ led@24 { ++ label = "green:wifi2"; ++ gpios = <&tlmm 24 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "wf188:green:2g"; ++ default-state = "off"; ++ }; ++ led@18 { ++ label = "green:wan"; ++ gpios = <&tlmm 18 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "wf188:green:eth"; ++ default-state = "off"; ++ }; ++ led_power: led@16 { ++ label = "green:power"; ++ gpios = <&tlmm 16 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "wf188:green:power"; ++ default-state = "on"; ++ }; ++ }; ++ ++ gpio-watchdog { ++ compatible = "linux,wdt-gpio"; ++ gpios = <&tlmm 35 GPIO_ACTIVE_HIGH>; ++ hw_algo = "toggle"; ++ hw_margin_ms = <5000>; ++ always-running; ++ }; ++}; ++ ++&blsp1_uart3 { ++ pinctrl-0 = <&uart_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&spi_0 { ++ pinctrl-0 = <&spi_0_pins>; ++ pinctrl-names = "default"; ++ cs-select = <0>; ++ status = "ok"; ++ ++ m25p80@0 { ++ #address-cells = <1>; ++ #size-cells = <1>; ++ reg = <0>; ++ compatible = "n25q128a11"; ++ linux,modalias = "m25p80", "n25q128a11"; ++ spi-max-frequency = <50000000>; ++ use-default-sizes; ++ }; ++}; ++ ++&blsp1_uart2 { ++ pinctrl-0 = <&uart2_pins>; ++ pinctrl-names = "default"; ++ dmas = <&blsp_dma 2>, ++ <&blsp_dma 3>; ++ dma-names = "tx", "rx"; ++ status = "ok"; ++}; ++&qpic_bam { ++ status = "ok"; ++}; ++ ++&nand { ++ pinctrl-0 = <&qpic_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&ssphy_0 { ++ status = "ok"; ++}; ++ ++&qusb_phy_0 { ++ status = "ok"; ++}; ++ ++&usb3 { ++ status = "ok"; ++}; ++ ++&nss_crypto { ++ status = "ok"; ++}; ++ ++&cpu0_opp_table { ++ compatible = "operating-points-v2"; ++ opp-shared; ++ opp03 { ++ opp-hz = /bits/ 64 <1200000000>; ++ opp-microvolt = <3>; ++ clock-latency-ns = <200000>; ++ }; ++ /delete-node/ opp04; ++ /delete-node/ opp05; ++ /delete-node/ opp06; ++}; ++ ++/* TZAPP is enabled in default memory profile only */ ++#if !defined(__IPQ_MEM_PROFILE_256_MB__) && !defined(__IPQ_MEM_PROFILE_512_MB__) ++&qseecom { ++ mem-start = <0x49B00000>; ++ mem-size = <0x600000>; ++ status = "ok"; ++}; ++#endif diff --git a/target/linux/ipq807x/patches/103-sercomm-wallaby.patch b/target/linux/ipq807x/patches/103-sercomm-wallaby.patch new file mode 100644 index 0000000000..c6d12fe963 --- /dev/null +++ b/target/linux/ipq807x/patches/103-sercomm-wallaby.patch @@ -0,0 +1,816 @@ +Index: linux-4.4.60-qsdk/arch/arm64/boot/dts/qcom/Makefile +=================================================================== +--- linux-4.4.60-qsdk.orig/arch/arm64/boot/dts/qcom/Makefile ++++ linux-4.4.60-qsdk/arch/arm64/boot/dts/qcom/Makefile +@@ -30,6 +30,7 @@ dtb-$(CONFIG_ARCH_QCOM) += \ + qcom-ipq807x-ac04.dtb \ + qcom-ipq807x-oak02.dtb \ + qcom-ipq807x-oak03.dtb \ ++ qcom-ipq807x-sercomm-wallaby.dtb \ + qcom-ipq807x-db.hk01.dtb \ + qcom-ipq807x-db.hk02.dtb + +Index: linux-4.4.60-qsdk/arch/arm64/boot/dts/qcom/qcom-ipq807x-sercomm-wallaby.dts +=================================================================== +--- /dev/null ++++ linux-4.4.60-qsdk/arch/arm64/boot/dts/qcom/qcom-ipq807x-sercomm-wallaby.dts +@@ -0,0 +1,799 @@ ++/dts-v1/; ++/* ++ * Copyright (c) 2017-2019, The Linux Foundation. All rights reserved. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ */ ++#include "qcom-ipq807x-soc.dtsi" ++#include "qcom-ipq807x-hk-cpu.dtsi" ++ ++/ { ++ #address-cells = <0x2>; ++ #size-cells = <0x2>; ++ model = "Sercomm Wallaby"; ++ compatible = "sercomm,wallaby", "qcom,ipq807x-hk09", "qcom,ipq807x"; ++ qcom,msm-id = <0x156 0x0>; ++ interrupt-parent = <&intc>; ++ qcom,board-id = <0x8 0x0>; ++ qcom,pmic-id = <0x0 0x0 0x0 0x0>; ++ ++ aliases { ++ /* ++ * Aliases as required by u-boot ++ * to patch MAC addresses ++ */ ++ ethernet3 = "/soc/dp4"; ++ ethernet4 = "/soc/dp5"; ++ ++ led-boot = &led_green; ++ led-failsafe = &led_green; ++ led-running = &led_green; ++ led-upgrade = &led_blue; ++ }; ++}; ++ ++&tlmm { ++ pinctrl-0 = <&btcoex_pins>; ++ pinctrl-names = "default"; ++ ++ btcoex_pins: btcoex_pins { ++ mux_0 { ++ pins = "gpio64"; ++ function = "pta1_1"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ mux_1 { ++ pins = "gpio65"; ++ function = "pta1_2"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ mux_2 { ++ pins = "gpio66"; ++ function = "pta1_0"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ }; ++ ++ mdio_pins: mdio_pinmux { ++ mux_0 { ++ pins = "gpio68"; ++ function = "mdc"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_1 { ++ pins = "gpio69"; ++ function = "mdio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_2 { ++ pins = "gpio25"; ++ function = "gpio"; ++ bias-pull-up; ++ }; ++ mux_3 { ++ pins = "gpio44"; ++ function = "gpio"; ++ bias-pull-up; ++ }; ++ }; ++ ++ uart_pins: uart_pins { ++ mux { ++ pins = "gpio23", "gpio24"; ++ function = "blsp4_uart1"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ spi_0_pins: spi_0_pins { ++ mux { ++ pins = "gpio38", "gpio39", "gpio40", "gpio41"; ++ function = "blsp0_spi"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ spi_3_pins: spi_3_pins { ++ mux { ++ pins = "gpio50", "gpio52", "gpio53"; ++ function = "blsp3_spi"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ spi_cs { ++ pins = "gpio22"; ++ function = "blsp3_spi2"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ quartz_interrupt { ++ pins = "gpio47"; ++ function = "gpio"; ++ input; ++ bias-disable; ++ }; ++ quartz_reset { ++ pins = "gpio21"; ++ function = "gpio"; ++ output-low; ++ bias-disable; ++ }; ++ }; ++ ++ qpic_pins: qpic_pins { ++ data_0 { ++ pins = "gpio15"; ++ function = "qpic_pad0"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_1 { ++ pins = "gpio12"; ++ function = "qpic_pad1"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_2 { ++ pins = "gpio13"; ++ function = "qpic_pad2"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_3 { ++ pins = "gpio14"; ++ function = "qpic_pad3"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_4 { ++ pins = "gpio5"; ++ function = "qpic_pad4"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_5 { ++ pins = "gpio6"; ++ function = "qpic_pad5"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_6 { ++ pins = "gpio7"; ++ function = "qpic_pad6"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_7 { ++ pins = "gpio8"; ++ function = "qpic_pad7"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ qpic_pad { ++ pins = "gpio1", "gpio3", "gpio4", ++ "gpio10", "gpio11", "gpio17"; ++ function = "qpic_pad"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ hsuart_pins: hsuart_pins { ++ mux { ++ pins = "gpio46", "gpio47", "gpio48", "gpio49"; ++ function = "blsp2_uart"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ button_pins: button_pins { ++ reset_button { ++ pins = "gpio33"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ led_pins: led_pins { ++ led_wan { ++ pins = "gpio31"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ ++ led_lan { ++ pins = "gpio32"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ ++ led_blue { ++ pins = "gpio60"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ ++ led_green { ++ pins = "gpio61"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ ++ led_red { ++ pins = "gpio62"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ usb_mux_sel_pins: usb_mux_pins { ++ mux { ++ pins = "gpio27"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ pcie0_pins: pcie_pins { ++ pcie0_rst { ++ pins = "gpio58"; ++ function = "pcie0_rst"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ pcie0_wake { ++ pins = "gpio59"; ++ function = "pcie0_wake"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++}; ++ ++&soc { ++ gpio_keys { ++ compatible = "gpio-keys"; ++ pinctrl-0 = <&button_pins>; ++ pinctrl-names = "default"; ++ ++ button@1 { ++ label = "reset"; ++ linux,code = ; ++ gpios = <&tlmm 33 GPIO_ACTIVE_LOW>; ++ linux,input-type = <1>; ++ debounce-interval = <60>; ++ }; ++ }; ++ ++ mdio: mdio@90000 { ++ pinctrl-0 = <&mdio_pins>; ++ pinctrl-names = "default"; ++ phy-reset-gpio = <&tlmm 37 0 &tlmm 25 1 &tlmm 44 1>; ++ compatible = "qcom,ipq40xx-mdio", "qcom,qca-mdio"; ++ phy3: ethernet-phy@3 { ++ reg = <3>; ++ }; ++ phy4: ethernet-phy@4 { ++ reg = <4>; ++ }; ++ }; ++ ++ ess-switch@3a000000 { ++ switch_cpu_bmp = <0x1>; /* cpu port bitmap */ ++ switch_lan_bmp = <0x10>; /* lan port bitmap */ ++ switch_wan_bmp = <0x20>; /* wan port bitmap */ ++ switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ ++ switch_mac_mode1 = <0xff>; /* mac mode for uniphy instance1*/ ++ switch_mac_mode2 = <0xff>; /* mac mode for uniphy instance2*/ ++ bm_tick_mode = <0>; /* bm tick mode */ ++ tm_tick_mode = <0>; /* tm tick mode */ ++ qcom,port_phyinfo { ++ port@0 { ++ port_id = <1>; ++ phy_address = <0>; ++ }; ++ port@1 { ++ port_id = <2>; ++ phy_address = <1>; ++ }; ++ port@2 { ++ port_id = <3>; ++ phy_address = <2>; ++ }; ++ port@3 { ++ port_id = <4>; ++ phy_address = <3>; ++ }; ++ port@4 { ++ port_id = <5>; ++ phy_address = <4>; ++/* port_mac_sel = "QGMAC_PORT"; */ ++ }; ++/* port@5 { ++ port_id = <6>; ++ phy_address = <28>; ++ port_mac_sel = "QGMAC_PORT"; ++ };*/ ++ }; ++ port_scheduler_resource { ++ port@0 { ++ port_id = <0>; ++ ucast_queue = <0 143>; ++ mcast_queue = <256 271>; ++ l0sp = <0 35>; ++ l0cdrr = <0 47>; ++ l0edrr = <0 47>; ++ l1cdrr = <0 7>; ++ l1edrr = <0 7>; ++ }; ++ port@1 { ++ port_id = <1>; ++ ucast_queue = <144 159>; ++ mcast_queue = <272 275>; ++ l0sp = <36 39>; ++ l0cdrr = <48 63>; ++ l0edrr = <48 63>; ++ l1cdrr = <8 11>; ++ l1edrr = <8 11>; ++ }; ++ port@2 { ++ port_id = <2>; ++ ucast_queue = <160 175>; ++ mcast_queue = <276 279>; ++ l0sp = <40 43>; ++ l0cdrr = <64 79>; ++ l0edrr = <64 79>; ++ l1cdrr = <12 15>; ++ l1edrr = <12 15>; ++ }; ++ port@3 { ++ port_id = <3>; ++ ucast_queue = <176 191>; ++ mcast_queue = <280 283>; ++ l0sp = <44 47>; ++ l0cdrr = <80 95>; ++ l0edrr = <80 95>; ++ l1cdrr = <16 19>; ++ l1edrr = <16 19>; ++ }; ++ port@4 { ++ port_id = <4>; ++ ucast_queue = <192 207>; ++ mcast_queue = <284 287>; ++ l0sp = <48 51>; ++ l0cdrr = <96 111>; ++ l0edrr = <96 111>; ++ l1cdrr = <20 23>; ++ l1edrr = <20 23>; ++ }; ++ port@5 { ++ port_id = <5>; ++ ucast_queue = <208 223>; ++ mcast_queue = <288 291>; ++ l0sp = <52 55>; ++ l0cdrr = <112 127>; ++ l0edrr = <112 127>; ++ l1cdrr = <24 27>; ++ l1edrr = <24 27>; ++ }; ++ port@6 { ++ port_id = <6>; ++ ucast_queue = <224 239>; ++ mcast_queue = <292 295>; ++ l0sp = <56 59>; ++ l0cdrr = <128 143>; ++ l0edrr = <128 143>; ++ l1cdrr = <28 31>; ++ l1edrr = <28 31>; ++ }; ++ port@7 { ++ port_id = <7>; ++ ucast_queue = <240 255>; ++ mcast_queue = <296 299>; ++ l0sp = <60 63>; ++ l0cdrr = <144 159>; ++ l0edrr = <144 159>; ++ l1cdrr = <32 35>; ++ l1edrr = <32 35>; ++ }; ++ }; ++ port_scheduler_config { ++ port@0 { ++ port_id = <0>; ++ l1scheduler { ++ group@0 { ++ sp = <0 1>; /*L0 SPs*/ ++ /*cpri cdrr epri edrr*/ ++ cfg = <0 0 0 0>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ /*unicast queues*/ ++ ucast_queue = <0 4 8>; ++ /*multicast queues*/ ++ mcast_queue = <256 260>; ++ /*sp cpri cdrr epri edrr*/ ++ cfg = <0 0 0 0 0>; ++ }; ++ group@1 { ++ ucast_queue = <1 5 9>; ++ mcast_queue = <257 261>; ++ cfg = <0 1 1 1 1>; ++ }; ++ group@2 { ++ ucast_queue = <2 6 10>; ++ mcast_queue = <258 262>; ++ cfg = <0 2 2 2 2>; ++ }; ++ group@3 { ++ ucast_queue = <3 7 11>; ++ mcast_queue = <259 263>; ++ cfg = <0 3 3 3 3>; ++ }; ++ }; ++ }; ++ port@1 { ++ port_id = <1>; ++ l1scheduler { ++ group@0 { ++ sp = <36>; ++ cfg = <0 8 0 8>; ++ }; ++ group@1 { ++ sp = <37>; ++ cfg = <1 9 1 9>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <144>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <272>; ++ mcast_loop_pri = <4>; ++ cfg = <36 0 48 0 48>; ++ }; ++ }; ++ }; ++ port@2 { ++ port_id = <2>; ++ l1scheduler { ++ group@0 { ++ sp = <40>; ++ cfg = <0 12 0 12>; ++ }; ++ group@1 { ++ sp = <41>; ++ cfg = <1 13 1 13>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <160>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <276>; ++ mcast_loop_pri = <4>; ++ cfg = <40 0 64 0 64>; ++ }; ++ }; ++ }; ++ port@3 { ++ port_id = <3>; ++ l1scheduler { ++ group@0 { ++ sp = <44>; ++ cfg = <0 16 0 16>; ++ }; ++ group@1 { ++ sp = <45>; ++ cfg = <1 17 1 17>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <176>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <280>; ++ mcast_loop_pri = <4>; ++ cfg = <44 0 80 0 80>; ++ }; ++ }; ++ }; ++ port@4 { ++ port_id = <4>; ++ l1scheduler { ++ group@0 { ++ sp = <48>; ++ cfg = <0 20 0 20>; ++ }; ++ group@1 { ++ sp = <49>; ++ cfg = <1 21 1 21>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <192>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <284>; ++ mcast_loop_pri = <4>; ++ cfg = <48 0 96 0 96>; ++ }; ++ }; ++ }; ++ port@5 { ++ port_id = <5>; ++ l1scheduler { ++ group@0 { ++ sp = <52>; ++ cfg = <0 24 0 24>; ++ }; ++ group@1 { ++ sp = <53>; ++ cfg = <1 25 1 25>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <208>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <288>; ++ mcast_loop_pri = <4>; ++ cfg = <52 0 112 0 112>; ++ }; ++ }; ++ }; ++ port@6 { ++ port_id = <6>; ++ l1scheduler { ++ group@0 { ++ sp = <56>; ++ cfg = <0 28 0 28>; ++ }; ++ group@1 { ++ sp = <57>; ++ cfg = <1 29 1 29>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <224>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <292>; ++ mcast_loop_pri = <4>; ++ cfg = <56 0 128 0 128>; ++ }; ++ }; ++ }; ++ port@7 { ++ port_id = <7>; ++ l1scheduler { ++ group@0 { ++ sp = <60>; ++ cfg = <0 32 0 32>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <240>; ++ mcast_queue = <296>; ++ cfg = <60 0 144 0 144>; ++ }; ++ }; ++ }; ++ }; ++ }; ++ ++ dp4 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <4>; ++ reg = <0x3a001600 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <3>; ++ phy-mode = "sgmii"; ++ }; ++ ++ dp5 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <5>; ++ reg = <0x3a003000 0x3fff>; ++ qcom,mactype = <1>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <4>; ++ phy-mode = "sgmii"; ++ }; ++ ++ leds { ++ compatible = "gpio-leds"; ++ pinctrl-0 = <&led_pins>; ++ pinctrl-names = "default"; ++ ++ led_wan { ++ label = "wallaby:green:wan"; ++ gpio = <&tlmm 31 GPIO_ACTIVE_HIGH>; ++ default-state = "off"; ++ }; ++ ++ led_lan { ++ label = "wallaby:green:lan"; ++ gpio = <&tlmm 32 GPIO_ACTIVE_HIGH>; ++ default-state = "off"; ++ }; ++ ++ led_blue: led_blue { ++ label = "wallaby:blue:status"; ++ gpio = <&tlmm 60 GPIO_ACTIVE_HIGH>; ++ default-state = "off"; ++ }; ++ ++ led_green: led_green { ++ label = "wallaby:green:status"; ++ gpio = <&tlmm 61 GPIO_ACTIVE_HIGH>; ++ default-state = "off"; ++ }; ++ ++ led_red { ++ label = "wallaby:red:status"; ++ gpio = <&tlmm 62 GPIO_ACTIVE_HIGH>; ++ default-state = "off"; ++ }; ++ ++ }; ++ nss-macsec0 { ++ compatible = "qcom,nss-macsec"; ++ phy_addr = <0x18>; ++ phy_access_mode = <0>; ++ mdiobus = <&mdio>; ++ }; ++ nss-macsec1 { ++ compatible = "qcom,nss-macsec"; ++ phy_addr = <0x1c>; ++ phy_access_mode = <0>; ++ mdiobus = <&mdio>; ++ }; ++}; ++ ++&serial_blsp4 { ++ pinctrl-0 = <&uart_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&spi_0 { /* BLSP1 QUP1 */ ++ pinctrl-0 = <&spi_0_pins>; ++ pinctrl-names = "default"; ++ cs-select = <0>; ++ status = "ok"; ++ ++ m25p80@0 { ++ #address-cells = <1>; ++ #size-cells = <1>; ++ reg = <0>; ++ compatible = "n25q128a11"; ++ linux,modalias = "m25p80", "n25q128a11"; ++ spi-max-frequency = <50000000>; ++ use-default-sizes; ++ }; ++}; ++ ++&spi_4 { /* BLSP1 QUP3 */ ++ pinctrl-0 = <&spi_3_pins>; ++ pinctrl-names = "default"; ++ cs-select = <2>; ++ quartz-reset-gpio = <&tlmm 21 1>; ++ status = "ok"; ++ spidev3: spi@3 { ++ compatible = "qca,spidev"; ++ reg = <0>; ++ spi-max-frequency = <24000000>; ++ }; ++}; ++ ++&serial_blsp2 { ++ status = "disabled"; ++}; ++ ++&msm_imem { ++ status = "disabled"; ++}; ++ ++&ssphy_0 { ++ status = "ok"; ++}; ++ ++&qusb_phy_0 { ++ status = "ok"; ++}; ++ ++&ssphy_1 { ++ status = "ok"; ++}; ++ ++&qusb_phy_1 { ++ status = "ok"; ++}; ++ ++&usb3_0 { ++ status = "ok"; ++}; ++ ++&usb3_1 { ++ status = "ok"; ++}; ++ ++&cryptobam { ++ status = "ok"; ++}; ++ ++&crypto { ++ status = "ok"; ++}; ++ ++&i2c_0 { ++ status = "disabled"; ++}; ++ ++&i2c_1 { ++ status = "disabled"; ++}; ++ ++&qpic_bam { ++ status = "ok"; ++}; ++ ++&nand { ++ pinctrl-0 = <&qpic_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&qpic_lcd { ++ status = "disabled"; ++}; ++ ++&qpic_lcd_panel { ++ status = "disabled"; ++}; ++ ++&ledc { ++ status = "disabled"; ++}; ++ ++&pcie0 { ++ status = "disabled"; ++}; ++ ++&pcie1 { ++ status = "disabled"; ++}; ++ diff --git a/target/linux/ipq807x/patches/104-wf194c.patch b/target/linux/ipq807x/patches/104-wf194c.patch new file mode 100644 index 0000000000..05b1b18b6c --- /dev/null +++ b/target/linux/ipq807x/patches/104-wf194c.patch @@ -0,0 +1,816 @@ +Index: linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/arch/arm64/boot/dts/qcom/Makefile +=================================================================== +--- linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1.orig/arch/arm64/boot/dts/qcom/Makefile ++++ linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/arch/arm64/boot/dts/qcom/Makefile +@@ -55,7 +55,8 @@ dtb-$(CONFIG_ARCH_QCOM) += \ + qcom-ipq807x-oak03.dtb \ + qcom-ipq807x-sercomm-wallaby.dtb \ + qcom-ipq807x-db.hk01.dtb \ +- qcom-ipq807x-db.hk02.dtb ++ qcom-ipq807x-db.hk02.dtb \ ++ qcom-ipq807x-wf194c.dtb + endif + + always := $(dtb-y) +Index: linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/arch/arm64/boot/dts/qcom/qcom-ipq807x-wf194c.dts +=================================================================== +--- /dev/null ++++ linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/arch/arm64/boot/dts/qcom/qcom-ipq807x-wf194c.dts +@@ -0,0 +1,751 @@ ++/dts-v1/; ++/* ++ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ */ ++#include "qcom-ipq807x-soc.dtsi" ++#include "qcom-ipq807x-audio.dtsi" ++#include "qcom-ipq807x-hk-cpu.dtsi" ++ ++/ { ++ #address-cells = <0x2>; ++ #size-cells = <0x2>; ++ model = "CIG WF194C"; ++ compatible = "cig,wf194c", "qcom,ipq807x"; ++ qcom,msm-id = <0x143 0x0>, <0x158 0x0>, <0x186 0x0>, <0x188 0x0>; ++ interrupt-parent = <&intc>; ++ qcom,board-id = <0x8 0x0>; ++ qcom,pmic-id = <0x0 0x0 0x0 0x0>; ++ ++ aliases { ++ sdhc1 = &sdhc_1; /* SDC1 eMMC slot */ ++ sdhc2 = &sdhc_2; /* SDC2 SD slot */ ++ /* ++ * Aliases as required by u-boot ++ * to patch MAC addresses ++ */ ++ ethernet0 = "/soc/dp1"; ++ ethernet1 = "/soc/dp2"; ++ }; ++ ++ chosen { ++ bootargs = "console=ttyMSM0,115200,n8 root=/dev/ram0 rw init=/init"; ++ #ifdef __IPQ_MEM_PROFILE_256_MB__ ++ bootargs-append = " swiotlb=1"; ++ #else ++ bootargs-append = " swiotlb=1 coherent_pool=2M"; ++ #endif ++ }; ++}; ++ ++&tlmm { ++ pinctrl-0 = <&btcoex_pins>; ++ pinctrl-names = "default"; ++ ++ btcoex_pins: btcoex_pins { ++ mux_0 { ++ pins = "gpio34"; ++ function = "gpio"; ++ drive-strength = <6>; ++ bias-pull-up; ++ output-high; ++ }; ++ mux_1 { ++ pins = "gpio62"; ++ function = "gpio"; ++ drive-strength = <6>; ++ bias-pull-up; ++ output-high; ++ }; ++ }; ++ ++ mdio_pins: mdio_pinmux { ++ mux_0 { ++ pins = "gpio68"; ++ function = "mdc"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_1 { ++ pins = "gpio69"; ++ function = "mdio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ uart_pins: uart_pins { ++ mux { ++ pins = "gpio23", "gpio24"; ++ function = "blsp4_uart1"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ i2c_0_pins: i2c_0_pinmux { ++ mux { ++ pins = "gpio42", "gpio43"; ++ function = "blsp1_i2c"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ spi_0_pins: spi_0_pins { ++ mux { ++ pins = "gpio38", "gpio39", "gpio40", "gpio41"; ++ function = "blsp0_spi"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ qpic_pins: qpic_pins { ++ data_0 { ++ pins = "gpio15"; ++ function = "qpic_pad0"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_1 { ++ pins = "gpio12"; ++ function = "qpic_pad1"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_2 { ++ pins = "gpio13"; ++ function = "qpic_pad2"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_3 { ++ pins = "gpio14"; ++ function = "qpic_pad3"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_4 { ++ pins = "gpio5"; ++ function = "qpic_pad4"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_5 { ++ pins = "gpio6"; ++ function = "qpic_pad5"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_6 { ++ pins = "gpio7"; ++ function = "qpic_pad6"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_7 { ++ pins = "gpio8"; ++ function = "qpic_pad7"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_8 { ++ pins = "gpio16"; ++ function = "qpic_pad8"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ qpic_pad { ++ pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4", ++ "gpio9", "gpio10", "gpio11", "gpio17"; ++ function = "qpic_pad"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ sd_pins: sd_pins { ++ mux { ++ pins = "gpio63"; ++ function = "sd_card"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ hsuart_pins: hsuart_pins { ++ mux { ++ pins = "gpio48", "gpio49"; ++ function = "blsp2_uart"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ leds_pins: leds_pinmux { ++ led1_r { ++ pins = "gpio54"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led1_g { ++ pins = "gpio55"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led2_r { ++ pins = "gpio56"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led2_g { ++ pins = "gpio64"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ ++ }; ++ ++ button_pins: button_pins { ++ wps_button { ++ pins = "gpio67"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ uniphy_pins: uniphy_pinmux { ++ mux_2 { ++ pins = "gpio37"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_3 { ++ pins = "gpio44"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ pwm_pins: pwm_pinmux { ++ mux_1 { ++ pins = "gpio25"; ++ function = "pwm02"; ++ drive-strength = <8>; ++ }; ++ }; ++}; ++ ++&soc { ++ pwm { ++ pinctrl-0 = <&pwm_pins>; ++ pinctrl-names = "default"; ++ used-pwm-indices = <1>, <0>, <0>, <0>; ++ status = "disabled"; ++ }; ++ ++ gpio_keys { ++ compatible = "gpio-keys"; ++ pinctrl-0 = <&button_pins>; ++ pinctrl-names = "default"; ++ ++ button@1 { ++ label = "reset"; ++ linux,code = ; ++ gpios = <&tlmm 67 GPIO_ACTIVE_LOW>; ++ linux,input-type = <1>; ++ debounce-interval = <60>; ++ }; ++ }; ++ ++ leds { ++ compatible = "gpio-leds"; ++ pinctrl-0 = <&leds_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++ ++ led@54 { ++ label = "wf194c:red:lan"; ++ gpios = <&tlmm 54 GPIO_ACTIVE_HIGH>; ++ default-state = "off"; ++ }; ++ led@55 { ++ label = "wf194c:green:lan"; ++ gpios = <&tlmm 55 GPIO_ACTIVE_HIGH>; ++ default-state = "on"; ++ }; ++ led@56 { ++ label = "wf194c:red:wan"; ++ gpios = <&tlmm 56 GPIO_ACTIVE_HIGH>; ++ default-state = "off"; ++ }; ++ led@64 { ++ label = "wf194c:green:wan"; ++ gpios = <&tlmm 64 GPIO_ACTIVE_HIGH>; ++ default-state = "on"; ++ }; ++ }; ++ mdio@90000 { ++ pinctrl-0 = <&mdio_pins>; ++ pinctrl-names = "default"; ++ phy-reset-gpio = <&tlmm 37 0 &tlmm 44 0>; ++ phy0: ethernet-phy@0 { ++ reg = <0xf>; /*<0>*/ ++ }; ++ phy1: ethernet-phy@1 { ++ reg = <0xf>; ++ }; ++ phy2: ethernet-phy@2 { ++ reg = <0xf>; ++ }; ++ phy3: ethernet-phy@3 { ++ reg = <3>; ++ }; ++ phy4: ethernet-phy@4 { ++ reg = <4>; ++ }; ++ phy5: ethernet-phy@5 { ++ compatible ="ethernet-phy-ieee802.3-c45"; ++ reg = <0>; ++ }; ++ }; ++ ++ ess-switch@3a000000 { ++ pinctrl-0 = <&uniphy_pins>; ++ pinctrl-names = "default"; ++ switch_cpu_bmp = <0x1>; /* cpu port bitmap */ ++ switch_lan_bmp = <0x30>; /*..0x3e lan port bitmap */ ++ switch_wan_bmp = <0x40>; /* wan port bitmap */ ++ switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ ++ switch_mac_mode1 = <0xff>; /* mac mode for uniphy instance1*/ ++ switch_mac_mode2 = <0xd>; /* mac mode for uniphy instance2*/ ++ bm_tick_mode = <0>; /* bm tick mode */ ++ tm_tick_mode = <0>; /* tm tick mode */ ++ port_scheduler_resource { ++ port@0 { ++ port_id = <0>; ++ ucast_queue = <0 143>; ++ mcast_queue = <256 271>; ++ l0sp = <0 35>; ++ l0cdrr = <0 47>; ++ l0edrr = <0 47>; ++ l1cdrr = <0 7>; ++ l1edrr = <0 7>; ++ }; ++ port@1 { ++ port_id = <1>; ++ ucast_queue = <144 159>; ++ mcast_queue = <272 275>; ++ l0sp = <36 39>; ++ l0cdrr = <48 63>; ++ l0edrr = <48 63>; ++ l1cdrr = <8 11>; ++ l1edrr = <8 11>; ++ }; ++ port@2 { ++ port_id = <2>; ++ ucast_queue = <160 175>; ++ mcast_queue = <276 279>; ++ l0sp = <40 43>; ++ l0cdrr = <64 79>; ++ l0edrr = <64 79>; ++ l1cdrr = <12 15>; ++ l1edrr = <12 15>; ++ }; ++ port@3 { ++ port_id = <3>; ++ ucast_queue = <176 191>; ++ mcast_queue = <280 283>; ++ l0sp = <44 47>; ++ l0cdrr = <80 95>; ++ l0edrr = <80 95>; ++ l1cdrr = <16 19>; ++ l1edrr = <16 19>; ++ }; ++ port@4 { ++ port_id = <4>; ++ ucast_queue = <192 207>; ++ mcast_queue = <284 287>; ++ l0sp = <48 51>; ++ l0cdrr = <96 111>; ++ l0edrr = <96 111>; ++ l1cdrr = <20 23>; ++ l1edrr = <20 23>; ++ }; ++ port@5 { ++ port_id = <5>; ++ ucast_queue = <208 223>; ++ mcast_queue = <288 291>; ++ l0sp = <52 55>; ++ l0cdrr = <112 127>; ++ l0edrr = <112 127>; ++ l1cdrr = <24 27>; ++ l1edrr = <24 27>; ++ }; ++ port@6 { ++ port_id = <6>; ++ ucast_queue = <224 239>; ++ mcast_queue = <292 295>; ++ l0sp = <56 59>; ++ l0cdrr = <128 143>; ++ l0edrr = <128 143>; ++ l1cdrr = <28 31>; ++ l1edrr = <28 31>; ++ }; ++ port@7 { ++ port_id = <7>; ++ ucast_queue = <240 255>; ++ mcast_queue = <296 299>; ++ l0sp = <60 63>; ++ l0cdrr = <144 159>; ++ l0edrr = <144 159>; ++ l1cdrr = <32 35>; ++ l1edrr = <32 35>; ++ }; ++ }; ++ port_scheduler_config { ++ port@0 { ++ port_id = <0>; ++ l1scheduler { ++ group@0 { ++ sp = <0 1>; /*L0 SPs*/ ++ /*cpri cdrr epri edrr*/ ++ cfg = <0 0 0 0>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ /*unicast queues*/ ++ ucast_queue = <0 4 8>; ++ /*multicast queues*/ ++ mcast_queue = <256 260>; ++ /*sp cpri cdrr epri edrr*/ ++ cfg = <0 0 0 0 0>; ++ }; ++ group@1 { ++ ucast_queue = <1 5 9>; ++ mcast_queue = <257 261>; ++ cfg = <0 1 1 1 1>; ++ }; ++ group@2 { ++ ucast_queue = <2 6 10>; ++ mcast_queue = <258 262>; ++ cfg = <0 2 2 2 2>; ++ }; ++ group@3 { ++ ucast_queue = <3 7 11>; ++ mcast_queue = <259 263>; ++ cfg = <0 3 3 3 3>; ++ }; ++ }; ++ }; ++ port@1 { ++ port_id = <1>; ++ l1scheduler { ++ group@0 { ++ sp = <36>; ++ cfg = <0 8 0 8>; ++ }; ++ group@1 { ++ sp = <37>; ++ cfg = <1 9 1 9>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <144>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <272>; ++ mcast_loop_pri = <4>; ++ cfg = <36 0 48 0 48>; ++ }; ++ }; ++ }; ++ port@2 { ++ port_id = <2>; ++ l1scheduler { ++ group@0 { ++ sp = <40>; ++ cfg = <0 12 0 12>; ++ }; ++ group@1 { ++ sp = <41>; ++ cfg = <1 13 1 13>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <160>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <276>; ++ mcast_loop_pri = <4>; ++ cfg = <40 0 64 0 64>; ++ }; ++ }; ++ }; ++ port@3 { ++ port_id = <3>; ++ l1scheduler { ++ group@0 { ++ sp = <44>; ++ cfg = <0 16 0 16>; ++ }; ++ group@1 { ++ sp = <45>; ++ cfg = <1 17 1 17>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <176>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <280>; ++ mcast_loop_pri = <4>; ++ cfg = <44 0 80 0 80>; ++ }; ++ }; ++ }; ++ port@4 { ++ port_id = <4>; ++ l1scheduler { ++ group@0 { ++ sp = <48>; ++ cfg = <0 20 0 20>; ++ }; ++ group@1 { ++ sp = <49>; ++ cfg = <1 21 1 21>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <192>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <284>; ++ mcast_loop_pri = <4>; ++ cfg = <48 0 96 0 96>; ++ }; ++ }; ++ }; ++ port@5 { ++ port_id = <5>; ++ l1scheduler { ++ group@0 { ++ sp = <52>; ++ cfg = <0 24 0 24>; ++ }; ++ group@1 { ++ sp = <53>; ++ cfg = <1 25 1 25>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <208>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <288>; ++ mcast_loop_pri = <4>; ++ cfg = <52 0 112 0 112>; ++ }; ++ }; ++ }; ++ port@6 { ++ port_id = <6>; ++ l1scheduler { ++ group@0 { ++ sp = <56>; ++ cfg = <0 28 0 28>; ++ }; ++ group@1 { ++ sp = <57>; ++ cfg = <1 29 1 29>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <224>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <292>; ++ mcast_loop_pri = <4>; ++ cfg = <56 0 128 0 128>; ++ }; ++ }; ++ }; ++ port@7 { ++ port_id = <7>; ++ l1scheduler { ++ group@0 { ++ sp = <60>; ++ cfg = <0 32 0 32>; ++ }; ++ group@1 { ++ sp = <61>; ++ cfg = <1 33 1 33>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <240>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <296>; ++ cfg = <60 0 144 0 144>; ++ }; ++ }; ++ }; ++ }; ++ }; ++ dp1 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <4>; ++ reg = <0x3a001600 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <3>; ++ phy-mode = "sgmii"; ++ }; ++ dp2 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <6>; ++ reg = <0x3a007000 0x3fff>; ++ qcom,mactype = <1>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <0>; ++ phy-mode = "sgmii"; ++ }; ++}; ++ ++&serial_blsp4 { ++ pinctrl-0 = <&uart_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&spi_0 { /* BLSP1 QUP1 */ ++ pinctrl-0 = <&spi_0_pins>; ++ pinctrl-names = "default"; ++ cs-select = <0>; ++ status = "ok"; ++ ++ m25p80@0 { ++ #address-cells = <1>; ++ #size-cells = <1>; ++ reg = <0>; ++ compatible = "n25q128a11"; ++ linux,modalias = "m25p80", "n25q128a11"; ++ spi-max-frequency = <50000000>; ++ use-default-sizes; ++ }; ++}; ++ ++&serial_blsp2 { ++ pinctrl-0 = <&hsuart_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&msm_imem { ++ status = "enabled"; ++}; ++ ++&ssphy_0 { ++ status = "ok"; ++}; ++ ++&qusb_phy_0 { ++ status = "ok"; ++}; ++ ++&ssphy_1 { ++ status = "ok"; ++}; ++ ++&qusb_phy_1 { ++ status = "ok"; ++}; ++ ++&usb3_0 { ++ status = "ok"; ++}; ++ ++&usb3_1 { ++ status = "ok"; ++}; ++ ++&cryptobam { ++ status = "ok"; ++}; ++ ++&crypto { ++ status = "ok"; ++}; ++ ++&i2c_0 { ++ pinctrl-0 = <&i2c_0_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&i2c_1 { ++ status = "disabled"; ++}; ++ ++&sdhc_1 { ++ qcom,clk-rates = <400000 25000000 50000000 100000000 \ ++ 192000000 384000000>; ++ qcom,bus-speed-mode = "HS400_1p8v", "HS200_1p8v", "DDR_1p8v"; ++ qcom,nonremovable; ++ status = "ok"; ++}; ++ ++&qpic_bam { ++ status = "ok"; ++}; ++ ++&nand { ++ pinctrl-0 = <&qpic_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&sdhc_2 { ++ qcom,clk-rates = <400000 25000000 50000000 100000000 \ ++ 192000000>; ++ qcom,bus-speed-mode = "HS200_1p8v", "DDR_1p8v"; ++ pinctrl-0 = <&sd_pins>; ++ pinctrl-names = "default"; ++ cd-gpios = <&tlmm 63 1>; ++ sd-ldo-gpios = <&tlmm 21 0>; ++ vqmmc-supply = <&ldo11>; ++ status = "ok"; ++}; ++ ++&qpic_lcd { ++ status = "ok"; ++}; ++ ++&qpic_lcd_panel { ++ status = "ok"; ++}; +Index: linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/drivers/net/phy/aquantia.c +=================================================================== +--- linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1.orig/drivers/net/phy/aquantia.c ++++ linux-4.4.60-qsdk-ad8f8efb2edcd35cdb130466cfc1923c37ef7ec1/drivers/net/phy/aquantia.c +@@ -32,6 +32,7 @@ + #define PHY_ID_AQR112 0x03a1b660 + #define PHY_ID_AQR113C 0x31c31C10 + #define PHY_ID_AQR112C 0x03a1b792 ++#define PHY_ID_AQR114C 0x31c31C22 + + #define AQ_PHY_MAX_VALID_MMD_REG 0xff01 + #define AQ_PHY_MAX_INVALID_MMD_REG 0xffff +@@ -756,6 +757,25 @@ static struct phy_driver aquantia_driver + .update_link = aquantia_update_link, + .driver = { .owner = THIS_MODULE,}, + }, ++{ ++ .phy_id = PHY_ID_AQR114C, ++ .phy_id_mask = 0xfffffff0, ++ .name = "Aquantia AQR114C", ++ .features = PHY_AQUANTIA_FEATURES, ++ .flags = PHY_HAS_INTERRUPT, ++ .probe = aquantia_phy_probe, ++ .soft_reset = aquantia_soft_reset, ++ .config_init = aquantia_config_init, ++ .aneg_done = aquantia_aneg_done, ++ .config_aneg = aquantia_config_aneg, ++ .config_intr = aquantia_config_intr, ++ .ack_interrupt = aquantia_ack_interrupt, ++ .read_status = aquantia_read_status, ++ .suspend = aquantia_suspend, ++ .resume = aquantia_resume, ++ .update_link = aquantia_update_link, ++ .driver = { .owner = THIS_MODULE,}, ++}, + }; + + module_phy_driver(aquantia_driver); +@@ -773,6 +793,7 @@ static struct mdio_device_id __maybe_unu + { PHY_ID_AQR112, 0xfffffff0 }, + { PHY_ID_AQR113C, 0xfffffff0 }, + { PHY_ID_AQR112C, 0xfffffff0 }, ++ { PHY_ID_AQR114C, 0xfffffff0 }, + { } + }; + diff --git a/target/linux/ipq807x/patches/105-fix-dtc-gcc10-build.patch b/target/linux/ipq807x/patches/105-fix-dtc-gcc10-build.patch new file mode 100644 index 0000000000..f91601ec59 --- /dev/null +++ b/target/linux/ipq807x/patches/105-fix-dtc-gcc10-build.patch @@ -0,0 +1,11 @@ +--- a/scripts/dtc/dtc-lexer.lex.c_shipped ++++ b/scripts/dtc/dtc-lexer.lex.c_shipped +@@ -637,7 +637,7 @@ char *yytext; + #include "srcpos.h" + #include "dtc-parser.tab.h" + +-YYLTYPE yylloc; ++extern YYLTYPE yylloc; + extern bool treesource_error; + + /* CAUTION: this will stop working if we ever use yyless() or yyunput() */ diff --git a/target/linux/ipq807x/patches/106-eap101.patch b/target/linux/ipq807x/patches/106-eap101.patch new file mode 100644 index 0000000000..188870b8dc --- /dev/null +++ b/target/linux/ipq807x/patches/106-eap101.patch @@ -0,0 +1,993 @@ +Index: linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/arch/arm64/boot/dts/qcom/qcom-ipq6018-cp01-edgecore.dtsi +=================================================================== +--- /dev/null ++++ linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/arch/arm64/boot/dts/qcom/qcom-ipq6018-cp01-edgecore.dtsi +@@ -0,0 +1,542 @@ ++/* ++ * Copyright (c) 2019, The Linux Foundation. All rights reserved. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ */ ++ ++#include "qcom-ipq6018.dtsi" ++#include ++#include ++ ++/ { ++ #address-cells = <0x2>; ++ #size-cells = <0x2>; ++ compatible = "qcom,ipq6018-cp01", "qcom,ipq6018"; ++ interrupt-parent = <&intc>; ++ qcom,msm-id = <0x192 0x0>, <0x193 0x0>; ++ ++ aliases { ++ serial0 = &blsp1_uart3; ++ serial1 = &blsp1_uart2; ++ sdhc2 = &sdhc_2; ++ /* ++ * Aliases as required by u-boot ++ * to patch MAC addresses ++ */ ++ ethernet0 = "/soc/dp1"; ++ ethernet1 = "/soc/dp2"; ++ ethernet2 = "/soc/dp3"; ++ ++ led-boot = &led_power; ++ led-failsafe = &led_power; ++ led-running = &led_power; ++ led-upgrade = &led_power; ++ }; ++ ++ chosen { ++ bootargs = "console=ttyMSM0,115200,n8 rw init=/init"; ++#ifdef __IPQ_MEM_PROFILE_256_MB__ ++ bootargs-append = " swiotlb=1"; ++#else ++ bootargs-append = " swiotlb=1 coherent_pool=2M"; ++#endif ++ }; ++ ++}; ++ ++&tlmm { ++ pinctrl-0 = <&sd_ldo_pins>; ++ pinctrl-names = "default"; ++ ++ uart_pins: uart_pins { ++ mux { ++ pins = "gpio44", "gpio45"; ++ function = "blsp2_uart"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ sd_ldo_pins: sd_ldo_pins { ++ mux { ++ pins = "gpio66"; ++ function = "gpio"; ++ drive-strength = <2>; ++ bias-disable; ++ output-low; ++ }; ++ }; ++ ++ spi_0_pins: spi_0_pins { ++ mux { ++ pins = "gpio38", "gpio39", "gpio40", "gpio41"; ++ function = "blsp0_spi"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ spi_1_pins: spi_1_pins { ++ mux { ++ pins = "gpio69", "gpio71", "gpio72"; ++ function = "blsp1_spi"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ spi_cs { ++ pins = "gpio70"; ++ function = "blsp1_spi"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ quartz_interrupt { ++ pins = "gpio78"; ++ function = "gpio"; ++ input; ++ bias-disable; ++ }; ++ quartz_reset { ++ pins = "gpio79"; ++ function = "gpio"; ++ output-low; ++ bias-disable; ++ }; ++ ++ }; ++ ++ qpic_pins: qpic_pins { ++ data_0 { ++ pins = "gpio15"; ++ function = "qpic_pad0"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_1 { ++ pins = "gpio12"; ++ function = "qpic_pad1"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_2 { ++ pins = "gpio13"; ++ function = "qpic_pad2"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_3 { ++ pins = "gpio14"; ++ function = "qpic_pad3"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_4 { ++ pins = "gpio5"; ++ function = "qpic_pad4"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_5 { ++ pins = "gpio6"; ++ function = "qpic_pad5"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_6 { ++ pins = "gpio7"; ++ function = "qpic_pad6"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_7 { ++ pins = "gpio8"; ++ function = "qpic_pad7"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ qpic_pad { ++ pins = "gpio1", "gpio3", "gpio4", ++ "gpio10", "gpio11", "gpio17"; ++ function = "qpic_pad"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ sd_pins: sd_pins { ++ mux { ++ pins = "gpio62"; ++ function = "sd_card"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ extcon_usb_pins: extcon_usb_pins { ++ mux { ++ pins = "gpio26"; ++ function = "gpio"; ++ drive-strength = <2>; ++ bias-pull-down; ++ }; ++ }; ++ ++ button_pins: button_pins { ++ wps_button { ++ pins = "gpio9"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ reset_button { ++ pins = "gpio19"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ mdio_pins: mdio_pinmux { ++ mux_0 { ++ pins = "gpio64"; ++ function = "mdc"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_1 { ++ pins = "gpio65"; ++ function = "mdio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_2 { ++ pins = "gpio75"; ++ function = "gpio"; ++ bias-pull-up; ++ }; ++ mux_3 { ++ pins = "gpio77"; ++ function = "gpio"; ++ bias-pull-up; ++ }; ++ }; ++ ++ leds_pins: leds_pins { ++ led_pwr { ++ pins = "gpio74"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_5g { ++ pins = "gpio35"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_2g { ++ pins = "gpio37"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_wan { ++ pins = "gpio62"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_lan1 { ++ pins = "gpio61"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_lan2 { ++ pins = "gpio63"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_bt { ++ pins = "gpio73"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ hsuart_pins: hsuart_pins { ++ mux { ++ pins = "gpio71", "gpio72", "gpio69", "gpio70"; ++ function = "blsp1_uart"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ btcoex_pins: btcoex_pins { ++ mux_0 { ++ pins = "gpio51"; ++ function = "pta1_1"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ mux_1 { ++ pins = "gpio53"; ++ function = "pta1_0"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ mux_2 { ++ pins = "gpio52"; ++ function = "pta1_2"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ }; ++}; ++ ++&soc { ++ extcon_usb: extcon_usb { ++ pinctrl-0 = <&extcon_usb_pins>; ++ pinctrl-names = "default"; ++ id-gpio = <&tlmm 26 GPIO_ACTIVE_LOW>; ++ status = "ok"; ++ }; ++ ++ mdio: mdio@90000 { ++ pinctrl-0 = <&mdio_pins>; ++ pinctrl-names = "default"; ++ phy-reset-gpio = <&tlmm 75 0 &tlmm 77 1>; ++ status = "ok"; ++ phy0: ethernet-phy@0 { ++ reg = <0x1c>; ++ }; ++ phy1: ethernet-phy@1 { ++ reg = <3>; ++ }; ++ phy2: ethernet-phy@2 { ++ reg = <4>; ++ }; ++ }; ++ ++ dp1 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <5>; ++ reg = <0x3a001000 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <28>; ++ phy-mode = "sgmii"; ++ }; ++ ++ dp2 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <4>; ++ reg = <0x3a001200 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <3>; ++ phy-mode = "sgmii"; ++ }; ++ ++ dp3 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <3>; ++ reg = <0x3a001400 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <4>; ++ phy-mode = "sgmii"; ++ }; ++ ++ ++ nss-macsec0 { ++ compatible = "qcom,nss-macsec"; ++ phy_addr = <0x1c>; ++ phy_access_mode = <0>; ++ mdiobus = <&mdio>; ++ }; ++ ++ ess-switch@3a000000 { ++ switch_cpu_bmp = <0x1>; /* cpu port bitmap */ ++ switch_lan_bmp = <0x1e>; /* lan port bitmap */ ++ switch_wan_bmp = <0x20>; /* wan port bitmap */ ++ switch_inner_bmp = <0xc0>; /*inner port bitmap*/ ++ switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ ++ switch_mac_mode1 = <0xf>; /* mac mode for uniphy instance1*/ ++ switch_mac_mode2 = <0xff>; /* mac mode for uniphy instance2*/ ++ qcom,port_phyinfo { ++ port@2 { ++ port_id = <3>; ++ phy_address = <4>; ++ }; ++ port@1 { ++ port_id = <4>; ++ phy_address = <3>; ++ }; ++ port@0 { ++ port_id = <5>; ++ phy_address = <0x1c>; ++ port_mac_sel = "QGMAC_PORT"; ++ }; ++ }; ++ }; ++ ++ gpio_keys { ++ compatible = "gpio-keys"; ++ pinctrl-0 = <&button_pins>; ++ pinctrl-names = "default"; ++ ++ /*wps { ++ label = "wps"; ++ linux,code = ; ++ gpios = <&tlmm 9 GPIO_ACTIVE_LOW>; ++ linux,input-type = <1>; ++ debounce-interval = <60>; ++ };*/ ++ reset { ++ label = "reset"; ++ linux,code = ; ++ gpios = <&tlmm 19 GPIO_ACTIVE_LOW>; ++ linux,input-type = <1>; ++ debounce-interval = <60>; ++ }; ++ }; ++ ++ leds { ++ compatible = "gpio-leds"; ++ pinctrl-0 = <&leds_pins>; ++ pinctrl-names = "default"; ++ ++ led_power: led@74 { ++ label = "green:power"; ++ gpios = <&tlmm 74 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "led_pwr"; ++ default-state = "on"; ++ }; ++ led@35 { ++ label = "green:wifi5"; ++ gpios = <&tlmm 35 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "led_5g"; ++ default-state = "off"; ++ }; ++ led@37 { ++ label = "green:wifi2"; ++ gpios = <&tlmm 37 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "led_2g"; ++ default-state = "off"; ++ }; ++ }; ++}; ++ ++&blsp1_uart3 { ++ pinctrl-0 = <&uart_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&spi_0 { ++ pinctrl-0 = <&spi_0_pins>; ++ pinctrl-names = "default"; ++ cs-select = <0>; ++ status = "ok"; ++ ++ m25p80@0 { ++ #address-cells = <1>; ++ #size-cells = <1>; ++ reg = <0>; ++ compatible = "n25q128a11"; ++ linux,modalias = "m25p80", "n25q128a11"; ++ spi-max-frequency = <50000000>; ++ use-default-sizes; ++ }; ++}; ++ ++&blsp1_uart2 { ++ pinctrl-0 = <&hsuart_pins &btcoex_pins>; ++ pinctrl-names = "default"; ++ dmas = <&blsp_dma 2>, ++ <&blsp_dma 3>; ++ dma-names = "tx", "rx"; ++ status = "ok"; ++}; ++ ++&spi_1 { /* BLSP1 QUP1 */ ++ pinctrl-0 = <&spi_1_pins>; ++ pinctrl-names = "default"; ++ cs-select = <0>; ++ quartz-reset-gpio = <&tlmm 79 1>; ++ status = "disabled"; ++ spidev1: spi@1 { ++ compatible = "qca,spidev"; ++ reg = <0>; ++ spi-max-frequency = <24000000>; ++ }; ++}; ++ ++&qpic_bam { ++ status = "ok"; ++}; ++ ++&nand { ++ pinctrl-0 = <&qpic_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&ssphy_0 { ++ status = "ok"; ++}; ++ ++&qusb_phy_0 { ++ status = "ok"; ++}; ++ ++&qusb_phy_1 { ++ status = "ok"; ++}; ++ ++&usb2 { ++ status = "ok"; ++}; ++ ++&usb3 { ++ status = "ok"; ++}; ++ ++&nss_crypto { ++ status = "ok"; ++}; ++ ++&pcie_phy { ++ status = "ok"; ++}; ++ ++&pcie0 { ++ status = "ok"; ++}; ++ ++&qpic_lcd { ++ status = "ok"; ++}; ++ ++&qpic_lcd_panel { ++ status = "ok"; ++}; +Index: linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/arch/arm64/boot/dts/qcom/qcom-ipq6018-edgecore-eap101.dts +=================================================================== +--- /dev/null ++++ linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/arch/arm64/boot/dts/qcom/qcom-ipq6018-edgecore-eap101.dts +@@ -0,0 +1,441 @@ ++/dts-v1/; ++/* ++ * Copyright (c) 2019, The Linux Foundation. All rights reserved. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ */ ++ ++#include "qcom-ipq6018.dtsi" ++#include "qcom-ipq6018-rpm-regulator.dtsi" ++#include "qcom-ipq6018-cpr-regulator.dtsi" ++#include "qcom-ipq6018-cp-cpu.dtsi" ++#include ++#include ++ ++/ { ++ #address-cells = <0x2>; ++ #size-cells = <0x2>; ++ model = "EdgeCore EAP101"; ++ compatible = "edgecore,eap101", "qcom,ipq6018-cp01", "qcom,ipq6018"; ++ interrupt-parent = <&intc>; ++ ++ aliases { ++ serial0 = &blsp1_uart3; ++ serial1 = &blsp1_uart2; ++ ++ /* ++ * Aliases as required by u-boot ++ * to patch MAC addresses ++ */ ++ ethernet0 = "/soc/dp1"; ++ ethernet1 = "/soc/dp2"; ++ ++ led-boot = &led_power; ++ led-failsafe = &led_power; ++ led-running = &led_power; ++ led-upgrade = &led_power; ++ }; ++ ++ chosen { ++ bootargs = "console=ttyMSM0,115200,n8 rw init=/init"; ++ bootargs-append = " console=ttyMSM0,115200,n8 swiotlb=1 coherent_pool=2M"; ++ }; ++ ++ /* ++ * +=========+==============+========================+ ++ * | | | | ++ * | Region | Start Offset | Size | ++ * | | | | ++ * +--------+--------------+-------------------------+ ++ * | | | | ++ * | | | | ++ * | | | | ++ * | | | | ++ * | Linux | 0x41000000 | 139MB | ++ * | | | | ++ * | | | | ++ * | | | | ++ * +--------+--------------+-------------------------+ ++ * | TZ App | 0x49B00000 | 6MB | ++ * +--------+--------------+-------------------------+ ++ * ++ * From the available 145 MB for Linux in the first 256 MB, ++ * we are reserving 6 MB for TZAPP. ++ * ++ * Refer arch/arm64/boot/dts/qcom/qcom-ipq6018-memory.dtsi ++ * for memory layout. ++ */ ++ ++/* TZAPP is enabled only in default memory profile */ ++#if !defined(__IPQ_MEM_PROFILE_256_MB__) && !defined(__IPQ_MEM_PROFILE_512_MB__) ++ reserved-memory { ++ tzapp:tzapp@49B00000 { /* TZAPPS */ ++ no-map; ++ reg = <0x0 0x49B00000 0x0 0x00600000>; ++ }; ++ }; ++#endif ++}; ++ ++&tlmm { ++ uart_pins: uart_pins { ++ mux { ++ pins = "gpio44", "gpio45"; ++ function = "blsp2_uart"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ spi_0_pins: spi_0_pins { ++ mux { ++ pins = "gpio38", "gpio39", "gpio40", "gpio41"; ++ function = "blsp0_spi"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ qpic_pins: qpic_pins { ++ data_0 { ++ pins = "gpio15"; ++ function = "qpic_pad0"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_1 { ++ pins = "gpio12"; ++ function = "qpic_pad1"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_2 { ++ pins = "gpio13"; ++ function = "qpic_pad2"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_3 { ++ pins = "gpio14"; ++ function = "qpic_pad3"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_4 { ++ pins = "gpio5"; ++ function = "qpic_pad4"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_5 { ++ pins = "gpio6"; ++ function = "qpic_pad5"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_6 { ++ pins = "gpio7"; ++ function = "qpic_pad6"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ data_7 { ++ pins = "gpio8"; ++ function = "qpic_pad7"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ qpic_pad { ++ pins = "gpio1", "gpio3", "gpio4", ++ "gpio10", "gpio11", "gpio17"; ++ function = "qpic_pad"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ extcon_usb_pins: extcon_usb_pins { ++ mux { ++ pins = "gpio26"; ++ function = "gpio"; ++ drive-strength = <2>; ++ bias-pull-down; ++ }; ++ }; ++ ++ button_pins: button_pins { ++ wps_button { ++ pins = "gpio19"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ mdio_pins: mdio_pinmux { ++ mux_0 { ++ pins = "gpio64"; ++ function = "mdc"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_1 { ++ pins = "gpio65"; ++ function = "mdio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_2 { ++ pins = "gpio75"; ++ function = "gpio"; ++ bias-pull-up; ++ }; ++ }; ++ ++ leds_pins: leds_pins { ++ led_pwr { ++ pins = "gpio74"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_5g { ++ pins = "gpio35"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led_2g { ++ pins = "gpio37"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ uart2_pins: uart2_pins { ++ mux { ++ pins = "gpio57", "gpio58"; ++ function = "blsp4_uart"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++}; ++ ++&soc { ++ extcon_usb: extcon_usb { ++ pinctrl-0 = <&extcon_usb_pins>; ++ pinctrl-names = "default"; ++ id-gpio = <&tlmm 26 GPIO_ACTIVE_LOW>; ++ status = "ok"; ++ }; ++ ++ mdio: mdio@90000 { ++ pinctrl-0 = <&mdio_pins>; ++ pinctrl-names = "default"; ++ phy-reset-gpio = <&tlmm 75 0 &tlmm 77 1>; ++ status = "ok"; ++ phy0: ethernet-phy@0 { ++ reg = <0x1c>; ++ }; ++ phy1: ethernet-phy@1 { ++ reg = <3>; ++ }; ++ phy2: ethernet-phy@2 { ++ reg = <4>; ++ }; ++ }; ++ ++ dp1 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <5>; ++ reg = <0x3a001000 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <28>; ++ phy-mode = "sgmii"; ++ }; ++ ++ dp2 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <4>; ++ reg = <0x3a001200 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <3>; ++ phy-mode = "sgmii"; ++ }; ++ ++ dp3 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <3>; ++ reg = <0x3a001400 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <4>; ++ phy-mode = "sgmii"; ++ }; ++ ++ ess-switch@3a000000 { ++ switch_cpu_bmp = <0x1>; /* cpu port bitmap */ ++ switch_lan_bmp = <0x1e>; /* lan port bitmap */ ++ switch_wan_bmp = <0x20>; /* wan port bitmap */ ++ switch_inner_bmp = <0xc0>; /*inner port bitmap*/ ++ switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ ++ switch_mac_mode1 = <0xf>; /* mac mode for uniphy instance1*/ ++ switch_mac_mode2 = <0xff>; /* mac mode for uniphy instance2*/ ++ qcom,port_phyinfo { ++ port@2 { ++ port_id = <3>; ++ phy_address = <4>; ++ }; ++ port@1 { ++ port_id = <4>; ++ phy_address = <3>; ++ }; ++ port@0 { ++ port_id = <5>; ++ phy_address = <0x1c>; ++ port_mac_sel = "QGMAC_PORT"; ++ }; ++ }; ++ }; ++ ++ gpio_keys { ++ compatible = "gpio-keys"; ++ pinctrl-0 = <&button_pins>; ++ pinctrl-names = "default"; ++ ++ wps { ++ label = "reset"; ++ linux,code = ; ++ gpios = <&tlmm 19 GPIO_ACTIVE_LOW>; ++ linux,input-type = <1>; ++ debounce-interval = <60>; ++ }; ++ }; ++ ++ leds { ++ compatible = "gpio-leds"; ++ pinctrl-0 = <&leds_pins>; ++ pinctrl-names = "default"; ++ ++ led@25 { ++ label = "green:wifi5"; ++ gpios = <&tlmm 35 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "wf188:green:5g"; ++ default-state = "off"; ++ }; ++ led@24 { ++ label = "green:wifi2"; ++ gpios = <&tlmm 37 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "wf188:green:2g"; ++ default-state = "off"; ++ }; ++ led_power: led@16 { ++ label = "green:led_pwr"; ++ gpios = <&tlmm 74 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "green:power"; ++ default-state = "off"; ++ }; ++ led@61 { ++ label = "green:lan1"; ++ gpios = <&tlmm 61 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "green:power"; ++ default-state = "off"; ++ }; ++ led@62 { ++ label = "green:wan"; ++ gpios = <&tlmm 62 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "green:power"; ++ default-state = "off"; ++ }; ++ led@63 { ++ label = "green:lan2"; ++ gpios = <&tlmm 63 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "green:power"; ++ default-state = "off"; ++ }; ++ }; ++}; ++ ++&blsp1_uart3 { ++ pinctrl-0 = <&uart_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&spi_0 { ++ pinctrl-0 = <&spi_0_pins>; ++ pinctrl-names = "default"; ++ cs-select = <0>; ++ status = "ok"; ++ ++ m25p80@0 { ++ #address-cells = <1>; ++ #size-cells = <1>; ++ reg = <0>; ++ compatible = "n25q128a11"; ++ linux,modalias = "m25p80", "n25q128a11"; ++ spi-max-frequency = <50000000>; ++ use-default-sizes; ++ }; ++}; ++ ++&blsp1_uart2 { ++ pinctrl-0 = <&uart2_pins>; ++ pinctrl-names = "default"; ++ dmas = <&blsp_dma 2>, ++ <&blsp_dma 3>; ++ dma-names = "tx", "rx"; ++ status = "ok"; ++}; ++&qpic_bam { ++ status = "ok"; ++}; ++ ++&nand { ++ pinctrl-0 = <&qpic_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&ssphy_0 { ++ status = "ok"; ++}; ++ ++&qusb_phy_0 { ++ status = "ok"; ++}; ++ ++&qusb_phy_1 { ++ status = "ok"; ++}; ++ ++&usb2 { ++ status = "ok"; ++}; ++ ++&usb3 { ++ status = "ok"; ++}; ++ ++&nss_crypto { ++ status = "ok"; ++}; diff --git a/target/linux/ipq807x/patches/108-log-spam.patch b/target/linux/ipq807x/patches/108-log-spam.patch new file mode 100644 index 0000000000..94b2a3ffd5 --- /dev/null +++ b/target/linux/ipq807x/patches/108-log-spam.patch @@ -0,0 +1,37 @@ +Index: linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/drivers/clk/qcom/clk-branch.c +=================================================================== +--- linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce.orig/drivers/clk/qcom/clk-branch.c ++++ linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/drivers/clk/qcom/clk-branch.c +@@ -75,7 +75,7 @@ static int clk_branch_wait(const struct + bool (check_halt)(const struct clk_branch *, bool)) + { + bool voted = br->halt_check & BRANCH_VOTED; +- const char *name = clk_hw_get_name(&br->clkr.hw); ++ //const char *name = clk_hw_get_name(&br->clkr.hw); + + /* Skip checking halt bit if the clock is in hardware gated mode */ + if (clk_branch_in_hwcg_mode(br)) +@@ -93,8 +93,8 @@ static int clk_branch_wait(const struct + return 0; + udelay(1); + } +- WARN(1, "%s status stuck at 'o%s'", name, +- enabling ? "ff" : "n"); ++/* WARN(1, "%s status stuck at 'o%s'", name, ++ enabling ? "ff" : "n");*/ + return -EBUSY; + } + return 0; +Index: linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/drivers/usb/phy/phy-msm-qusb.c +=================================================================== +--- linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce.orig/drivers/usb/phy/phy-msm-qusb.c ++++ linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/drivers/usb/phy/phy-msm-qusb.c +@@ -491,7 +491,7 @@ static int qusb_phy_init(struct usb_phy + dev_err(phy->dev, "QUSB PHY PLL LOCK fails:%x\n", + readb_relaxed(qphy->base + + QUSB2PHY_PLL_STATUS)); +- WARN_ON(1); ++ //WARN_ON(1); + } + + /* Set OTG VBUS Valid from HSPHY to controller */ diff --git a/target/linux/ipq807x/patches/109-tplink.patch b/target/linux/ipq807x/patches/109-tplink.patch new file mode 100644 index 0000000000..3ad3ffd99c --- /dev/null +++ b/target/linux/ipq807x/patches/109-tplink.patch @@ -0,0 +1,1518 @@ +Index: linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/arch/arm64/boot/dts/qcom/qcom-ipq807x-ex227.dts +=================================================================== +--- /dev/null ++++ linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/arch/arm64/boot/dts/qcom/qcom-ipq807x-ex227.dts +@@ -0,0 +1,754 @@ ++/dts-v1/; ++/* ++ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ */ ++#include "qcom-ipq807x-soc.dtsi" ++#include "qcom-ipq807x-hk-cpu.dtsi" ++ ++/ { ++ #address-cells = <0x2>; ++ #size-cells = <0x2>; ++ model = "TP-Link EX227"; ++ compatible = "tplink,ex227", "qcom,ipq807x"; ++ qcom,msm-id = <0x143 0x0>; ++ interrupt-parent = <&intc>; ++ qcom,board-id = <0x8 0x0>; ++ qcom,pmic-id = <0x0 0x0 0x0 0x0>; ++ ++ aliases { ++ /* ++ * Aliases as required by u-boot ++ * to patch MAC addresses ++ */ ++ ethernet0 = "/soc/dp1"; ++ led-boot = &led_power; ++ led-failsafe = &led_power; ++ led-running = &led_power; ++ led-upgrade = &led_power; ++ }; ++ ++ chosen { ++ bootargs = "console=ttyMSM0,115200,n8 root=/dev/ram0 rw \ ++ init=/init"; ++ bootargs-append = " swiotlb=1"; ++ }; ++}; ++ ++&tlmm { ++ pinctrl-0 = <&btcoex_pins>; ++ pinctrl-names = "default"; ++ ++ btcoex_pins: btcoex_pins { ++ mux_0 { ++ pins = "gpio64"; ++ function = "pta1_1"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ mux_1 { ++ pins = "gpio65"; ++ function = "pta1_2"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ mux_2 { ++ pins = "gpio66"; ++ function = "pta1_0"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ }; ++ ++ mdio_pins: mdio_pinmux { ++ mux_0 { ++ pins = "gpio68"; ++ function = "mdc"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_1 { ++ pins = "gpio69"; ++ function = "mdio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_2 { ++ pins = "gpio25"; ++ function = "gpio"; ++ bias-pull-up; ++ }; ++ mux_3 { ++ pins = "gpio37"; ++ function = "gpio"; ++ bias-pull-up; ++ }; ++ }; ++ ++ uart_pins: uart_pins { ++ mux { ++ pins = "gpio23", "gpio24"; ++ function = "blsp4_uart1"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ spi_0_pins: spi_0_pins { ++ mux { ++ pins = "gpio38", "gpio39", "gpio40", "gpio41"; ++ function = "blsp0_spi"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ spi_3_pins: spi_3_pins { ++ mux { ++ pins = "gpio52", "gpio53"; ++ function = "blsp3_spi"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ spi_cs { ++ pins = "gpio22"; ++ function = "blsp3_spi2"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ quartz_interrupt { ++ pins = "gpio47"; ++ function = "gpio"; ++ input; ++ bias-disable; ++ }; ++ quartz_reset { ++ pins = "gpio21"; ++ function = "gpio"; ++ output-low; ++ bias-disable; ++ }; ++ }; ++ ++ qpic_pins: qpic_pins { ++ data_0 { ++ pins = "gpio15"; ++ function = "qpic_pad0"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_1 { ++ pins = "gpio12"; ++ function = "qpic_pad1"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_2 { ++ pins = "gpio13"; ++ function = "qpic_pad2"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_3 { ++ pins = "gpio14"; ++ function = "qpic_pad3"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_4 { ++ pins = "gpio5"; ++ function = "qpic_pad4"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_5 { ++ pins = "gpio6"; ++ function = "qpic_pad5"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_6 { ++ pins = "gpio7"; ++ function = "qpic_pad6"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_7 { ++ pins = "gpio8"; ++ function = "qpic_pad7"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ qpic_pad { ++ pins = "gpio1", "gpio3", "gpio4", ++ "gpio10", "gpio11", "gpio17"; ++ function = "qpic_pad"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ hsuart_pins: hsuart_pins { ++ mux { ++ pins = "gpio46", "gpio47", "gpio48", "gpio49"; ++ function = "blsp2_uart"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ /* POWER_LED, TP-Link */ ++ led_pins: led_pins { ++ led_power { ++ pins = "gpio42"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ /* BUTTON, TP-Link */ ++ button_pins: button_pins { ++ reset_button { ++ pins = "gpio50"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ usb_mux_sel_pins: usb_mux_pins { ++ mux { ++ pins = "gpio27"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ pcie0_pins: pcie_pins { ++ pcie0_rst { ++ pins = "gpio58"; ++ function = "pcie0_rst"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ pcie0_wake { ++ pins = "gpio59"; ++ function = "pcie0_wake"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++}; ++ ++&soc { ++ mdio: mdio@90000 { ++ pinctrl-0 = <&mdio_pins>; ++ pinctrl-names = "default"; ++ phy-reset-gpio = <&tlmm 37 0 &tlmm 25 1>; ++ compatible = "qcom,ipq40xx-mdio", "qcom,qca-mdio"; ++ phy0: ethernet-phy@0 { ++ reg = <0>; ++ }; ++ phy1: ethernet-phy@1 { ++ reg = <1>; ++ }; ++ phy2: ethernet-phy@2 { ++ reg = <2>; ++ }; ++ phy3: ethernet-phy@3 { ++ reg = <3>; ++ }; ++ phy4: ethernet-phy@4 { ++ reg = <28>; ++ }; ++ phy5: ethernet-phy@5 { ++ reg = <4>; ++ }; ++ }; ++ ++ ess-switch@3a000000 { ++ switch_cpu_bmp = <0x1>; /* cpu port bitmap */ ++ switch_lan_bmp = <0x3e>; /* lan port bitmap */ ++ switch_wan_bmp = <0x40>; /* wan port bitmap */ ++ switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ ++ switch_mac_mode1 = <0xf>; /* mac mode for uniphy instance1*/ ++ switch_mac_mode2 = <0xf>; /* mac mode for uniphy instance2*/ ++ bm_tick_mode = <0>; /* bm tick mode */ ++ tm_tick_mode = <0>; /* tm tick mode */ ++ qcom,port_phyinfo { ++ port@0 { ++ port_id = <1>; ++ phy_address = <0>; ++ }; ++ port@1 { ++ port_id = <2>; ++ phy_address = <1>; ++ }; ++ port@2 { ++ port_id = <3>; ++ phy_address = <2>; ++ }; ++ port@3 { ++ port_id = <4>; ++ phy_address = <3>; ++ }; ++ port@4 { ++ port_id = <5>; ++ phy_address = <28>; ++ port_mac_sel = "QGMAC_PORT"; ++ }; ++ port@5 { ++ port_id = <6>; ++ phy_address = <4>; ++ }; ++ }; ++ port_scheduler_resource { ++ port@0 { ++ port_id = <0>; ++ ucast_queue = <0 143>; ++ mcast_queue = <256 271>; ++ l0sp = <0 35>; ++ l0cdrr = <0 47>; ++ l0edrr = <0 47>; ++ l1cdrr = <0 7>; ++ l1edrr = <0 7>; ++ }; ++ port@1 { ++ port_id = <1>; ++ ucast_queue = <144 159>; ++ mcast_queue = <272 275>; ++ l0sp = <36 39>; ++ l0cdrr = <48 63>; ++ l0edrr = <48 63>; ++ l1cdrr = <8 11>; ++ l1edrr = <8 11>; ++ }; ++ port@2 { ++ port_id = <2>; ++ ucast_queue = <160 175>; ++ mcast_queue = <276 279>; ++ l0sp = <40 43>; ++ l0cdrr = <64 79>; ++ l0edrr = <64 79>; ++ l1cdrr = <12 15>; ++ l1edrr = <12 15>; ++ }; ++ port@3 { ++ port_id = <3>; ++ ucast_queue = <176 191>; ++ mcast_queue = <280 283>; ++ l0sp = <44 47>; ++ l0cdrr = <80 95>; ++ l0edrr = <80 95>; ++ l1cdrr = <16 19>; ++ l1edrr = <16 19>; ++ }; ++ port@4 { ++ port_id = <4>; ++ ucast_queue = <192 207>; ++ mcast_queue = <284 287>; ++ l0sp = <48 51>; ++ l0cdrr = <96 111>; ++ l0edrr = <96 111>; ++ l1cdrr = <20 23>; ++ l1edrr = <20 23>; ++ }; ++ port@5 { ++ port_id = <5>; ++ ucast_queue = <208 223>; ++ mcast_queue = <288 291>; ++ l0sp = <52 55>; ++ l0cdrr = <112 127>; ++ l0edrr = <112 127>; ++ l1cdrr = <24 27>; ++ l1edrr = <24 27>; ++ }; ++ port@6 { ++ port_id = <6>; ++ ucast_queue = <224 239>; ++ mcast_queue = <292 295>; ++ l0sp = <56 59>; ++ l0cdrr = <128 143>; ++ l0edrr = <128 143>; ++ l1cdrr = <28 31>; ++ l1edrr = <28 31>; ++ }; ++ port@7 { ++ port_id = <7>; ++ ucast_queue = <240 255>; ++ mcast_queue = <296 299>; ++ l0sp = <60 63>; ++ l0cdrr = <144 159>; ++ l0edrr = <144 159>; ++ l1cdrr = <32 35>; ++ l1edrr = <32 35>; ++ }; ++ }; ++ port_scheduler_config { ++ port@0 { ++ port_id = <0>; ++ l1scheduler { ++ group@0 { ++ sp = <0 1>; /*L0 SPs*/ ++ /*cpri cdrr epri edrr*/ ++ cfg = <0 0 0 0>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ /*unicast queues*/ ++ ucast_queue = <0 4 8>; ++ /*multicast queues*/ ++ mcast_queue = <256 260>; ++ /*sp cpri cdrr epri edrr*/ ++ cfg = <0 0 0 0 0>; ++ }; ++ group@1 { ++ ucast_queue = <1 5 9>; ++ mcast_queue = <257 261>; ++ cfg = <0 1 1 1 1>; ++ }; ++ group@2 { ++ ucast_queue = <2 6 10>; ++ mcast_queue = <258 262>; ++ cfg = <0 2 2 2 2>; ++ }; ++ group@3 { ++ ucast_queue = <3 7 11>; ++ mcast_queue = <259 263>; ++ cfg = <0 3 3 3 3>; ++ }; ++ }; ++ }; ++ port@1 { ++ port_id = <1>; ++ l1scheduler { ++ group@0 { ++ sp = <36>; ++ cfg = <0 8 0 8>; ++ }; ++ group@1 { ++ sp = <37>; ++ cfg = <1 9 1 9>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <144>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <272>; ++ mcast_loop_pri = <4>; ++ cfg = <36 0 48 0 48>; ++ }; ++ }; ++ }; ++ port@2 { ++ port_id = <2>; ++ l1scheduler { ++ group@0 { ++ sp = <40>; ++ cfg = <0 12 0 12>; ++ }; ++ group@1 { ++ sp = <41>; ++ cfg = <1 13 1 13>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <160>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <276>; ++ mcast_loop_pri = <4>; ++ cfg = <40 0 64 0 64>; ++ }; ++ }; ++ }; ++ port@3 { ++ port_id = <3>; ++ l1scheduler { ++ group@0 { ++ sp = <44>; ++ cfg = <0 16 0 16>; ++ }; ++ group@1 { ++ sp = <45>; ++ cfg = <1 17 1 17>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <176>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <280>; ++ mcast_loop_pri = <4>; ++ cfg = <44 0 80 0 80>; ++ }; ++ }; ++ }; ++ port@4 { ++ port_id = <4>; ++ l1scheduler { ++ group@0 { ++ sp = <48>; ++ cfg = <0 20 0 20>; ++ }; ++ group@1 { ++ sp = <49>; ++ cfg = <1 21 1 21>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <192>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <284>; ++ mcast_loop_pri = <4>; ++ cfg = <48 0 96 0 96>; ++ }; ++ }; ++ }; ++ port@5 { ++ port_id = <5>; ++ l1scheduler { ++ group@0 { ++ sp = <52>; ++ cfg = <0 24 0 24>; ++ }; ++ group@1 { ++ sp = <53>; ++ cfg = <1 25 1 25>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <208>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <288>; ++ mcast_loop_pri = <4>; ++ cfg = <52 0 112 0 112>; ++ }; ++ }; ++ }; ++ port@6 { ++ port_id = <6>; ++ l1scheduler { ++ group@0 { ++ sp = <56>; ++ cfg = <0 28 0 28>; ++ }; ++ group@1 { ++ sp = <57>; ++ cfg = <1 29 1 29>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <224>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <292>; ++ mcast_loop_pri = <4>; ++ cfg = <56 0 128 0 128>; ++ }; ++ }; ++ }; ++ port@7 { ++ port_id = <7>; ++ l1scheduler { ++ group@0 { ++ sp = <60>; ++ cfg = <0 32 0 32>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <240>; ++ mcast_queue = <296>; ++ cfg = <60 0 144 0 144>; ++ }; ++ }; ++ }; ++ }; ++ }; ++ ++ dp1 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <6>; ++ reg = <0x3a001000 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <4>; ++ phy-mode = "sgmii"; ++ }; ++ ++ /* POWER LED, TP-Link */ ++ leds { ++ compatible = "gpio-leds"; ++ pinctrl-0 = <&led_pins>; ++ pinctrl-names = "default"; ++ ++ led_power: led_power { ++ label = "blue:power"; ++ gpio = <&tlmm 42 GPIO_ACTIVE_HIGH>; ++ default-state = "on"; ++ }; ++ }; ++ ++ /* BUTTON, TP-Link */ ++ gpio_keys { ++ compatible = "gpio-keys"; ++ pinctrl-0 = <&button_pins>; ++ pinctrl-names = "default"; ++ ++ button@1 { ++ label = "reset"; ++ linux,code = ; ++ gpios = <&tlmm 50 GPIO_ACTIVE_LOW>; ++ linux,input-type = <1>; ++ }; ++ }; ++ ++ nss-macsec0 { ++ compatible = "qcom,nss-macsec"; ++ phy_addr = <0x18>; ++ phy_access_mode = <0>; ++ mdiobus = <&mdio>; ++ }; ++ nss-macsec1 { ++ compatible = "qcom,nss-macsec"; ++ phy_addr = <0x1c>; ++ phy_access_mode = <0>; ++ mdiobus = <&mdio>; ++ }; ++}; ++ ++&serial_blsp4 { ++ pinctrl-0 = <&uart_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&spi_0 { /* BLSP1 QUP1 */ ++ pinctrl-0 = <&spi_0_pins>; ++ pinctrl-names = "default"; ++ cs-select = <0>; ++ status = "ok"; ++ ++ m25p80@0 { ++ #address-cells = <1>; ++ #size-cells = <1>; ++ reg = <0>; ++ compatible = "n25q128a11"; ++ linux,modalias = "m25p80", "n25q128a11"; ++ spi-max-frequency = <50000000>; ++ use-default-sizes; ++ }; ++}; ++ ++&spi_4 { /* BLSP1 QUP3 */ ++ pinctrl-0 = <&spi_3_pins>; ++ pinctrl-names = "default"; ++ cs-select = <2>; ++ quartz-reset-gpio = <&tlmm 21 1>; ++ status = "disabled"; ++ spidev3: spi@3 { ++ compatible = "qca,spidev"; ++ reg = <0>; ++ spi-max-frequency = <24000000>; ++ }; ++}; ++ ++&serial_blsp2 { ++ status = "disabled"; ++}; ++ ++&msm_imem { ++ status = "disabled"; ++}; ++ ++&ssphy_0 { ++ status = "ok"; ++}; ++ ++&qusb_phy_0 { ++ status = "ok"; ++}; ++ ++&ssphy_1 { ++ status = "ok"; ++}; ++ ++&qusb_phy_1 { ++ status = "ok"; ++}; ++ ++&usb3_0 { ++ status = "ok"; ++}; ++ ++&usb3_1 { ++ status = "ok"; ++}; ++ ++&cryptobam { ++ status = "ok"; ++}; ++ ++&crypto { ++ status = "ok"; ++}; ++ ++&i2c_0 { ++ status = "disabled"; ++}; ++ ++&i2c_1 { ++ status = "disabled"; ++}; ++ ++&qpic_bam { ++ status = "ok"; ++}; ++ ++&nand { ++ pinctrl-0 = <&qpic_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&qpic_lcd { ++ status = "disabled"; ++}; ++ ++&qpic_lcd_panel { ++ status = "disabled"; ++}; ++ ++&ledc { ++ status = "disabled"; ++}; ++ ++&pcie0 { ++ status = "ok"; ++}; ++ ++&pcie1 { ++ status = "disabled"; ++}; ++ +Index: linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/arch/arm64/boot/dts/qcom/qcom-ipq807x-ex447.dts +=================================================================== +--- /dev/null ++++ linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/arch/arm64/boot/dts/qcom/qcom-ipq807x-ex447.dts +@@ -0,0 +1,754 @@ ++/dts-v1/; ++/* ++ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ */ ++#include "qcom-ipq807x-soc.dtsi" ++#include "qcom-ipq807x-hk-cpu.dtsi" ++ ++/ { ++ #address-cells = <0x2>; ++ #size-cells = <0x2>; ++ model = "TP-Link EX447"; ++ compatible = "tplink,ex447", "qcom,ipq807x"; ++ qcom,msm-id = <0x143 0x0>; ++ interrupt-parent = <&intc>; ++ qcom,board-id = <0x8 0x0>; ++ qcom,pmic-id = <0x0 0x0 0x0 0x0>; ++ ++ aliases { ++ /* ++ * Aliases as required by u-boot ++ * to patch MAC addresses ++ */ ++ ethernet0 = "/soc/dp1"; ++ }; ++ ++ chosen { ++ bootargs = "console=ttyMSM0,115200,n8 root=/dev/ram0 rw \ ++ init=/init"; ++ bootargs-append = " swiotlb=1"; ++ led-boot = &led_power; ++ led-failsafe = &led_power; ++ led-running = &led_power; ++ led-upgrade = &led_power; ++ }; ++}; ++ ++&tlmm { ++ pinctrl-0 = <&btcoex_pins>; ++ pinctrl-names = "default"; ++ ++ btcoex_pins: btcoex_pins { ++ mux_0 { ++ pins = "gpio64"; ++ function = "pta1_1"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ mux_1 { ++ pins = "gpio65"; ++ function = "pta1_2"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ mux_2 { ++ pins = "gpio66"; ++ function = "pta1_0"; ++ drive-strength = <6>; ++ bias-pull-down; ++ }; ++ }; ++ ++ mdio_pins: mdio_pinmux { ++ mux_0 { ++ pins = "gpio68"; ++ function = "mdc"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_1 { ++ pins = "gpio69"; ++ function = "mdio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_2 { ++ pins = "gpio25"; ++ function = "gpio"; ++ bias-pull-up; ++ }; ++ mux_3 { ++ pins = "gpio37"; ++ function = "gpio"; ++ bias-pull-up; ++ }; ++ }; ++ ++ uart_pins: uart_pins { ++ mux { ++ pins = "gpio23", "gpio24"; ++ function = "blsp4_uart1"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ spi_0_pins: spi_0_pins { ++ mux { ++ pins = "gpio38", "gpio39", "gpio40", "gpio41"; ++ function = "blsp0_spi"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ spi_3_pins: spi_3_pins { ++ mux { ++ pins = "gpio52", "gpio53"; ++ function = "blsp3_spi"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ spi_cs { ++ pins = "gpio22"; ++ function = "blsp3_spi2"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ quartz_interrupt { ++ pins = "gpio47"; ++ function = "gpio"; ++ input; ++ bias-disable; ++ }; ++ quartz_reset { ++ pins = "gpio21"; ++ function = "gpio"; ++ output-low; ++ bias-disable; ++ }; ++ }; ++ ++ qpic_pins: qpic_pins { ++ data_0 { ++ pins = "gpio15"; ++ function = "qpic_pad0"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_1 { ++ pins = "gpio12"; ++ function = "qpic_pad1"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_2 { ++ pins = "gpio13"; ++ function = "qpic_pad2"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_3 { ++ pins = "gpio14"; ++ function = "qpic_pad3"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_4 { ++ pins = "gpio5"; ++ function = "qpic_pad4"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_5 { ++ pins = "gpio6"; ++ function = "qpic_pad5"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_6 { ++ pins = "gpio7"; ++ function = "qpic_pad6"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_7 { ++ pins = "gpio8"; ++ function = "qpic_pad7"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ qpic_pad { ++ pins = "gpio1", "gpio3", "gpio4", ++ "gpio10", "gpio11", "gpio17"; ++ function = "qpic_pad"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ hsuart_pins: hsuart_pins { ++ mux { ++ pins = "gpio46", "gpio47", "gpio48", "gpio49"; ++ function = "blsp2_uart"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ /* POWER_LED, TP-Link */ ++ led_pins: led_pins { ++ led_power { ++ pins = "gpio42"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ /* BUTTON, TP-Link */ ++ button_pins: button_pins { ++ reset_button { ++ pins = "gpio50"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ usb_mux_sel_pins: usb_mux_pins { ++ mux { ++ pins = "gpio27"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ pcie0_pins: pcie_pins { ++ pcie0_rst { ++ pins = "gpio58"; ++ function = "pcie0_rst"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ pcie0_wake { ++ pins = "gpio59"; ++ function = "pcie0_wake"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++}; ++ ++&soc { ++ mdio: mdio@90000 { ++ pinctrl-0 = <&mdio_pins>; ++ pinctrl-names = "default"; ++ phy-reset-gpio = <&tlmm 37 0 &tlmm 25 1>; ++ compatible = "qcom,ipq40xx-mdio", "qcom,qca-mdio"; ++ phy0: ethernet-phy@0 { ++ reg = <0>; ++ }; ++ phy1: ethernet-phy@1 { ++ reg = <1>; ++ }; ++ phy2: ethernet-phy@2 { ++ reg = <2>; ++ }; ++ phy3: ethernet-phy@3 { ++ reg = <3>; ++ }; ++ phy4: ethernet-phy@4 { ++ reg = <28>; ++ }; ++ phy5: ethernet-phy@5 { ++ reg = <4>; ++ }; ++ }; ++ ++ ess-switch@3a000000 { ++ switch_cpu_bmp = <0x1>; /* cpu port bitmap */ ++ switch_lan_bmp = <0x3e>; /* lan port bitmap */ ++ switch_wan_bmp = <0x40>; /* wan port bitmap */ ++ switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ ++ switch_mac_mode1 = <0xf>; /* mac mode for uniphy instance1*/ ++ switch_mac_mode2 = <0xf>; /* mac mode for uniphy instance2*/ ++ bm_tick_mode = <0>; /* bm tick mode */ ++ tm_tick_mode = <0>; /* tm tick mode */ ++ qcom,port_phyinfo { ++ port@0 { ++ port_id = <1>; ++ phy_address = <0>; ++ }; ++ port@1 { ++ port_id = <2>; ++ phy_address = <1>; ++ }; ++ port@2 { ++ port_id = <3>; ++ phy_address = <2>; ++ }; ++ port@3 { ++ port_id = <4>; ++ phy_address = <3>; ++ }; ++ port@4 { ++ port_id = <5>; ++ phy_address = <28>; ++ port_mac_sel = "QGMAC_PORT"; ++ }; ++ port@5 { ++ port_id = <6>; ++ phy_address = <4>; ++ }; ++ }; ++ port_scheduler_resource { ++ port@0 { ++ port_id = <0>; ++ ucast_queue = <0 143>; ++ mcast_queue = <256 271>; ++ l0sp = <0 35>; ++ l0cdrr = <0 47>; ++ l0edrr = <0 47>; ++ l1cdrr = <0 7>; ++ l1edrr = <0 7>; ++ }; ++ port@1 { ++ port_id = <1>; ++ ucast_queue = <144 159>; ++ mcast_queue = <272 275>; ++ l0sp = <36 39>; ++ l0cdrr = <48 63>; ++ l0edrr = <48 63>; ++ l1cdrr = <8 11>; ++ l1edrr = <8 11>; ++ }; ++ port@2 { ++ port_id = <2>; ++ ucast_queue = <160 175>; ++ mcast_queue = <276 279>; ++ l0sp = <40 43>; ++ l0cdrr = <64 79>; ++ l0edrr = <64 79>; ++ l1cdrr = <12 15>; ++ l1edrr = <12 15>; ++ }; ++ port@3 { ++ port_id = <3>; ++ ucast_queue = <176 191>; ++ mcast_queue = <280 283>; ++ l0sp = <44 47>; ++ l0cdrr = <80 95>; ++ l0edrr = <80 95>; ++ l1cdrr = <16 19>; ++ l1edrr = <16 19>; ++ }; ++ port@4 { ++ port_id = <4>; ++ ucast_queue = <192 207>; ++ mcast_queue = <284 287>; ++ l0sp = <48 51>; ++ l0cdrr = <96 111>; ++ l0edrr = <96 111>; ++ l1cdrr = <20 23>; ++ l1edrr = <20 23>; ++ }; ++ port@5 { ++ port_id = <5>; ++ ucast_queue = <208 223>; ++ mcast_queue = <288 291>; ++ l0sp = <52 55>; ++ l0cdrr = <112 127>; ++ l0edrr = <112 127>; ++ l1cdrr = <24 27>; ++ l1edrr = <24 27>; ++ }; ++ port@6 { ++ port_id = <6>; ++ ucast_queue = <224 239>; ++ mcast_queue = <292 295>; ++ l0sp = <56 59>; ++ l0cdrr = <128 143>; ++ l0edrr = <128 143>; ++ l1cdrr = <28 31>; ++ l1edrr = <28 31>; ++ }; ++ port@7 { ++ port_id = <7>; ++ ucast_queue = <240 255>; ++ mcast_queue = <296 299>; ++ l0sp = <60 63>; ++ l0cdrr = <144 159>; ++ l0edrr = <144 159>; ++ l1cdrr = <32 35>; ++ l1edrr = <32 35>; ++ }; ++ }; ++ port_scheduler_config { ++ port@0 { ++ port_id = <0>; ++ l1scheduler { ++ group@0 { ++ sp = <0 1>; /*L0 SPs*/ ++ /*cpri cdrr epri edrr*/ ++ cfg = <0 0 0 0>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ /*unicast queues*/ ++ ucast_queue = <0 4 8>; ++ /*multicast queues*/ ++ mcast_queue = <256 260>; ++ /*sp cpri cdrr epri edrr*/ ++ cfg = <0 0 0 0 0>; ++ }; ++ group@1 { ++ ucast_queue = <1 5 9>; ++ mcast_queue = <257 261>; ++ cfg = <0 1 1 1 1>; ++ }; ++ group@2 { ++ ucast_queue = <2 6 10>; ++ mcast_queue = <258 262>; ++ cfg = <0 2 2 2 2>; ++ }; ++ group@3 { ++ ucast_queue = <3 7 11>; ++ mcast_queue = <259 263>; ++ cfg = <0 3 3 3 3>; ++ }; ++ }; ++ }; ++ port@1 { ++ port_id = <1>; ++ l1scheduler { ++ group@0 { ++ sp = <36>; ++ cfg = <0 8 0 8>; ++ }; ++ group@1 { ++ sp = <37>; ++ cfg = <1 9 1 9>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <144>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <272>; ++ mcast_loop_pri = <4>; ++ cfg = <36 0 48 0 48>; ++ }; ++ }; ++ }; ++ port@2 { ++ port_id = <2>; ++ l1scheduler { ++ group@0 { ++ sp = <40>; ++ cfg = <0 12 0 12>; ++ }; ++ group@1 { ++ sp = <41>; ++ cfg = <1 13 1 13>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <160>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <276>; ++ mcast_loop_pri = <4>; ++ cfg = <40 0 64 0 64>; ++ }; ++ }; ++ }; ++ port@3 { ++ port_id = <3>; ++ l1scheduler { ++ group@0 { ++ sp = <44>; ++ cfg = <0 16 0 16>; ++ }; ++ group@1 { ++ sp = <45>; ++ cfg = <1 17 1 17>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <176>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <280>; ++ mcast_loop_pri = <4>; ++ cfg = <44 0 80 0 80>; ++ }; ++ }; ++ }; ++ port@4 { ++ port_id = <4>; ++ l1scheduler { ++ group@0 { ++ sp = <48>; ++ cfg = <0 20 0 20>; ++ }; ++ group@1 { ++ sp = <49>; ++ cfg = <1 21 1 21>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <192>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <284>; ++ mcast_loop_pri = <4>; ++ cfg = <48 0 96 0 96>; ++ }; ++ }; ++ }; ++ port@5 { ++ port_id = <5>; ++ l1scheduler { ++ group@0 { ++ sp = <52>; ++ cfg = <0 24 0 24>; ++ }; ++ group@1 { ++ sp = <53>; ++ cfg = <1 25 1 25>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <208>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <288>; ++ mcast_loop_pri = <4>; ++ cfg = <52 0 112 0 112>; ++ }; ++ }; ++ }; ++ port@6 { ++ port_id = <6>; ++ l1scheduler { ++ group@0 { ++ sp = <56>; ++ cfg = <0 28 0 28>; ++ }; ++ group@1 { ++ sp = <57>; ++ cfg = <1 29 1 29>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <224>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <292>; ++ mcast_loop_pri = <4>; ++ cfg = <56 0 128 0 128>; ++ }; ++ }; ++ }; ++ port@7 { ++ port_id = <7>; ++ l1scheduler { ++ group@0 { ++ sp = <60>; ++ cfg = <0 32 0 32>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <240>; ++ mcast_queue = <296>; ++ cfg = <60 0 144 0 144>; ++ }; ++ }; ++ }; ++ }; ++ }; ++ ++ dp1 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <5>; ++ reg = <0x3a001000 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <28>; ++ phy-mode = "sgmii"; ++ }; ++ ++ /* POWER LED, TP-Link */ ++ leds { ++ compatible = "gpio-leds"; ++ pinctrl-0 = <&led_pins>; ++ pinctrl-names = "default"; ++ ++ led_power: led_power { ++ label = "power:blue"; ++ gpio = <&tlmm 42 GPIO_ACTIVE_HIGH>; ++ default-state = "on"; ++ }; ++ }; ++ ++ /* BUTTON, TP-Link */ ++ gpio_keys { ++ compatible = "gpio-keys"; ++ pinctrl-0 = <&button_pins>; ++ pinctrl-names = "default"; ++ ++ button@1 { ++ label = "reset"; ++ linux,code = ; ++ gpios = <&tlmm 50 GPIO_ACTIVE_LOW>; ++ linux,input-type = <1>; ++ }; ++ }; ++ ++ nss-macsec0 { ++ compatible = "qcom,nss-macsec"; ++ phy_addr = <0x18>; ++ phy_access_mode = <0>; ++ mdiobus = <&mdio>; ++ }; ++ nss-macsec1 { ++ compatible = "qcom,nss-macsec"; ++ phy_addr = <0x1c>; ++ phy_access_mode = <0>; ++ mdiobus = <&mdio>; ++ }; ++}; ++ ++&serial_blsp4 { ++ pinctrl-0 = <&uart_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&spi_0 { /* BLSP1 QUP1 */ ++ pinctrl-0 = <&spi_0_pins>; ++ pinctrl-names = "default"; ++ cs-select = <0>; ++ status = "ok"; ++ ++ m25p80@0 { ++ #address-cells = <1>; ++ #size-cells = <1>; ++ reg = <0>; ++ compatible = "n25q128a11"; ++ linux,modalias = "m25p80", "n25q128a11"; ++ spi-max-frequency = <50000000>; ++ use-default-sizes; ++ }; ++}; ++ ++&spi_4 { /* BLSP1 QUP3 */ ++ pinctrl-0 = <&spi_3_pins>; ++ pinctrl-names = "default"; ++ cs-select = <2>; ++ quartz-reset-gpio = <&tlmm 21 1>; ++ status = "disabled"; ++ spidev3: spi@3 { ++ compatible = "qca,spidev"; ++ reg = <0>; ++ spi-max-frequency = <24000000>; ++ }; ++}; ++ ++&serial_blsp2 { ++ status = "disabled"; ++}; ++ ++&msm_imem { ++ status = "disabled"; ++}; ++ ++&ssphy_0 { ++ status = "ok"; ++}; ++ ++&qusb_phy_0 { ++ status = "ok"; ++}; ++ ++&ssphy_1 { ++ status = "ok"; ++}; ++ ++&qusb_phy_1 { ++ status = "ok"; ++}; ++ ++&usb3_0 { ++ status = "ok"; ++}; ++ ++&usb3_1 { ++ status = "ok"; ++}; ++ ++&cryptobam { ++ status = "ok"; ++}; ++ ++&crypto { ++ status = "ok"; ++}; ++ ++&i2c_0 { ++ status = "disabled"; ++}; ++ ++&i2c_1 { ++ status = "disabled"; ++}; ++ ++&qpic_bam { ++ status = "ok"; ++}; ++ ++&nand { ++ pinctrl-0 = <&qpic_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&qpic_lcd { ++ status = "disabled"; ++}; ++ ++&qpic_lcd_panel { ++ status = "disabled"; ++}; ++ ++&ledc { ++ status = "disabled"; ++}; ++ ++&pcie0 { ++ status = "ok"; ++}; ++ ++&pcie1 { ++ status = "disabled"; ++}; ++ diff --git a/target/linux/ipq807x/patches/110-add-esmt-nand.patch b/target/linux/ipq807x/patches/110-add-esmt-nand.patch new file mode 100644 index 0000000000..d47a4d0d16 --- /dev/null +++ b/target/linux/ipq807x/patches/110-add-esmt-nand.patch @@ -0,0 +1,37 @@ +Index: linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/drivers/mtd/nand/nand_ids.c +=================================================================== +--- linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce.orig/drivers/mtd/nand/nand_ids.c ++++ linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/drivers/mtd/nand/nand_ids.c +@@ -62,6 +62,12 @@ struct nand_flash_dev nand_flash_ids[] = + {"TH58NYG3S0H 8G 1.8V 8-bit", + { .id = {0x98, 0xa3, 0x91, 0x26} }, + SZ_4K, SZ_1K, SZ_256K, 0, 4, 256, NAND_ECC_INFO(8, SZ_512) }, ++ ++ {"F59D2G81KA 2G 1.8V 8-bit", ++ { .id = {0xc8, 0x5a, 0x90, 0x04} }, ++ SZ_2K, SZ_256, SZ_128K, 0, 4, 128, NAND_ECC_INFO(8, SZ_512) }, ++ ++ + LEGACY_ID_NAND("NAND 4MiB 5V 8-bit", 0x6B, 4, SZ_8K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 4MiB 3,3V 8-bit", 0xE3, 4, SZ_8K, SP_OPTIONS), + LEGACY_ID_NAND("NAND 4MiB 3,3V 8-bit", 0xE5, 4, SZ_8K, SP_OPTIONS), +@@ -190,6 +196,7 @@ struct nand_manufacturers nand_manuf_ids + {NAND_MFR_SANDISK, "SanDisk"}, + {NAND_MFR_INTEL, "Intel"}, + {NAND_MFR_ATO, "ATO"}, ++ {NAND_MFR_ESMT, "ESMT"}, + {NAND_MFR_GIGA, "GigaDevice"}, + {NAND_MFR_ATO, "ATO"}, + {NAND_MFR_WINBOND, "Winbond"}, +Index: linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/include/linux/mtd/nand.h +=================================================================== +--- linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce.orig/include/linux/mtd/nand.h ++++ linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/include/linux/mtd/nand.h +@@ -778,6 +778,7 @@ static inline struct mtd_info *nand_to_m + #define NAND_MFR_ATO 0x9b + #define NAND_MFR_WINBOND 0xef + #define NAND_MFR_FIDELIX 0xe5 ++#define NAND_MFR_ESMT 0xc8 + + /* The maximum expected count of bytes in the NAND ID sequence */ + #define NAND_MAX_ID_LEN 8 diff --git a/target/linux/ipq807x/patches/111-eap106.patch b/target/linux/ipq807x/patches/111-eap106.patch new file mode 100644 index 0000000000..497742193e --- /dev/null +++ b/target/linux/ipq807x/patches/111-eap106.patch @@ -0,0 +1,765 @@ +Index: linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/arch/arm64/boot/dts/qcom/qcom-ipq807x-eap106.dts +=================================================================== +--- /dev/null ++++ linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/arch/arm64/boot/dts/qcom/qcom-ipq807x-eap106.dts +@@ -0,0 +1,716 @@ ++/dts-v1/; ++/* ++ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ */ ++#include "qcom-ipq807x-soc.dtsi" ++#include "qcom-ipq807x-hk-cpu.dtsi" ++ ++/ { ++ #address-cells = <0x2>; ++ #size-cells = <0x2>; ++ model = "Edgecore EAP106"; ++ compatible = "edgecore,eap106", "qcom,ipq807x-hk02", "qcom,ipq807x"; ++ qcom,msm-id = <0x143 0x0>; ++ interrupt-parent = <&intc>; ++ qcom,board-id = <0x8 0x0>; ++ qcom,pmic-id = <0x0 0x0 0x0 0x0>; ++ ++ aliases { ++ /* ++ * Aliases as required by u-boot ++ * to patch MAC addresses ++ */ ++ ethernet0 = "/soc/dp1"; ++ ethernet1 = "/soc/dp2"; ++ }; ++ chosen { ++ bootargs = "console=ttyMSM0,115200,n8 root=/dev/ram0 rw init=/init"; ++ #ifdef __IPQ_MEM_PROFILE_256_MB__ ++ bootargs-append = " swiotlb=1"; ++ #else ++ bootargs-append = " swiotlb=1 coherent_pool=2M"; ++ #endif ++ }; ++}; ++ ++&tlmm { ++ leds_pins: leds_pinmux { ++ ++ led1_yellow { ++ pins = "gpio25"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led1_green { ++ pins = "gpio28"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led2_amber { ++ pins = "gpio29"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ led2_blue { ++ pins = "gpio32"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-down; ++ }; ++ }; ++ ++ mdio_pins: mdio_pinmux { ++ mux_0 { ++ pins = "gpio68"; ++ function = "mdc"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ mux_1 { ++ pins = "gpio69"; ++ function = "mdio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ uart_pins: uart_pins { ++ mux { ++ pins = "gpio23", "gpio24"; ++ function = "blsp4_uart1"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ i2c_0_pins: i2c_0_pinmux { ++ mux { ++ pins = "gpio42", "gpio43"; ++ function = "blsp1_i2c"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ spi_0_pins: spi_0_pins { ++ mux { ++ pins = "gpio38", "gpio39", "gpio40", "gpio41"; ++ function = "blsp0_spi"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ qpic_pins: qpic_pins { ++ data_0 { ++ pins = "gpio15"; ++ function = "qpic_pad0"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_1 { ++ pins = "gpio12"; ++ function = "qpic_pad1"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_2 { ++ pins = "gpio13"; ++ function = "qpic_pad2"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_3 { ++ pins = "gpio14"; ++ function = "qpic_pad3"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_4 { ++ pins = "gpio5"; ++ function = "qpic_pad4"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_5 { ++ pins = "gpio6"; ++ function = "qpic_pad5"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_6 { ++ pins = "gpio7"; ++ function = "qpic_pad6"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_7 { ++ pins = "gpio8"; ++ function = "qpic_pad7"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ data_8 { ++ pins = "gpio16"; ++ function = "qpic_pad8"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ qpic_pad { ++ pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4", ++ "gpio9", "gpio10", "gpio11", "gpio17"; ++ function = "qpic_pad"; ++ drive-strength = <8>; ++ bias-disable; ++ }; ++ }; ++ ++ hsuart_pins: hsuart_pins { ++ mux { ++ pins = "gpio46", "gpio47", "gpio48", "gpio49"; ++ function = "blsp2_uart"; ++ drive-strength = <8>; ++ bias-disable; ++ output-low; ++ }; ++ mux_1 { ++ pins = "gpio51"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-disable; ++ output-high; ++ }; ++ }; ++ ++ button_pins: button_pins { ++ wps_button { ++ pins = "gpio57"; ++ function = "gpio"; ++ drive-strength = <8>; ++ bias-pull-up; ++ }; ++ }; ++ ++ uniphy_pins: uniphy_pinmux { ++ mux { ++ pins = "gpio60"; ++ function = "rx2"; ++ bias-disable; ++ }; ++ }; ++ cnss_wlan_en_active: cnss_wlan_en_active { ++ mux { ++ pins = "gpio57"; ++ function = "gpio"; ++ drive-strength = <16>; ++ output-high; ++ bias-pull-up; ++ }; ++ }; ++ ++ cnss_wlan_en_sleep: cnss_wlan_en_sleep { ++ mux { ++ pins = "gpio57"; ++ function = "gpio"; ++ drive-strength = <2>; ++ output-low; ++ bias-pull-down; ++ }; ++ }; ++}; ++ ++&soc { ++ gpio_keys { ++ compatible = "gpio-keys"; ++ pinctrl-0 = <&button_pins>; ++ pinctrl-names = "default"; ++ ++ button@1 { ++ label = "wps"; ++ linux,code = ; ++ gpios = <&tlmm 57 GPIO_ACTIVE_LOW>; ++ linux,input-type = <1>; ++ debounce-interval = <60>; ++ }; ++ }; ++ ++ leds { ++ compatible = "gpio-leds"; ++ pinctrl-0 = <&leds_pins>; ++ pinctrl-names = "default"; ++ ++ led@25 { ++ label = "led1_yellow"; ++ gpios = <&tlmm 25 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "led1_yellow"; ++ default-state = "off"; ++ }; ++ ++ led@28 { ++ label = "led1_green"; ++ gpios = <&tlmm 28 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "led1_green"; ++ default-state = "off"; ++ }; ++ ++ led@29 { ++ label = "led2_amber"; ++ gpios = <&tlmm 29 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "led2_amber"; ++ default-state = "off"; ++ }; ++ ++ led@32 { ++ label = "led2_blue"; ++ gpio = <&tlmm 32 GPIO_ACTIVE_HIGH>; ++ linux,default-trigger = "led2_blue"; ++ default-state = "off"; ++ }; ++ }; ++ ++ mdio@90000 { ++ pinctrl-0 = <&mdio_pins>; ++ pinctrl-names = "default"; ++ phy-reset-gpio = <&tlmm 37 0>; ++ phy0: ethernet-phy@0 { ++ reg = <0>; ++ }; ++ phy1: ethernet-phy@1 { ++ reg = <1>; ++ }; ++ phy2: ethernet-phy@2 { ++ reg = <2>; ++ }; ++ phy3: ethernet-phy@3 { ++ reg = <3>; ++ }; ++ phy4: ethernet-phy@4 { ++ reg = <4>; ++ }; ++ phy5: ethernet-phy@5 { ++ compatible ="ethernet-phy-ieee802.3-c45"; ++ reg = <8>; ++ }; ++ }; ++ ++ ess-switch@3a000000 { ++ pinctrl-0 = <&uniphy_pins>; ++ pinctrl-names = "default"; ++ switch_cpu_bmp = <0x1>; /* cpu port bitmap */ ++ switch_lan_bmp = <0x30>; /* lan port bitmap */ ++ switch_wan_bmp = <0x40>; /* wan port bitmap */ ++ switch_mac_mode = <0x0>; /* mac mode for uniphy instance0*/ ++ switch_mac_mode1 = <0xff>; /* mac mode for uniphy instance1*/ ++ switch_mac_mode2 = <0xd>; /* mac mode for uniphy instance2*/ ++ bm_tick_mode = <0>; /* bm tick mode */ ++ tm_tick_mode = <0>; /* tm tick mode */ ++ port_scheduler_resource { ++ port@0 { ++ port_id = <0>; ++ ucast_queue = <0 143>; ++ mcast_queue = <256 271>; ++ l0sp = <0 35>; ++ l0cdrr = <0 47>; ++ l0edrr = <0 47>; ++ l1cdrr = <0 7>; ++ l1edrr = <0 7>; ++ }; ++ port@1 { ++ port_id = <1>; ++ ucast_queue = <144 159>; ++ mcast_queue = <272 275>; ++ l0sp = <36 39>; ++ l0cdrr = <48 63>; ++ l0edrr = <48 63>; ++ l1cdrr = <8 11>; ++ l1edrr = <8 11>; ++ }; ++ port@2 { ++ port_id = <2>; ++ ucast_queue = <160 175>; ++ mcast_queue = <276 279>; ++ l0sp = <40 43>; ++ l0cdrr = <64 79>; ++ l0edrr = <64 79>; ++ l1cdrr = <12 15>; ++ l1edrr = <12 15>; ++ }; ++ port@3 { ++ port_id = <3>; ++ ucast_queue = <176 191>; ++ mcast_queue = <280 283>; ++ l0sp = <44 47>; ++ l0cdrr = <80 95>; ++ l0edrr = <80 95>; ++ l1cdrr = <16 19>; ++ l1edrr = <16 19>; ++ }; ++ port@4 { ++ port_id = <4>; ++ ucast_queue = <192 207>; ++ mcast_queue = <284 287>; ++ l0sp = <48 51>; ++ l0cdrr = <96 111>; ++ l0edrr = <96 111>; ++ l1cdrr = <20 23>; ++ l1edrr = <20 23>; ++ }; ++ port@5 { ++ port_id = <5>; ++ ucast_queue = <208 223>; ++ mcast_queue = <288 291>; ++ l0sp = <52 55>; ++ l0cdrr = <112 127>; ++ l0edrr = <112 127>; ++ l1cdrr = <24 27>; ++ l1edrr = <24 27>; ++ }; ++ port@6 { ++ port_id = <6>; ++ ucast_queue = <224 239>; ++ mcast_queue = <292 295>; ++ l0sp = <56 59>; ++ l0cdrr = <128 143>; ++ l0edrr = <128 143>; ++ l1cdrr = <28 31>; ++ l1edrr = <28 31>; ++ }; ++ port@7 { ++ port_id = <7>; ++ ucast_queue = <240 255>; ++ mcast_queue = <296 299>; ++ l0sp = <60 63>; ++ l0cdrr = <144 159>; ++ l0edrr = <144 159>; ++ l1cdrr = <32 35>; ++ l1edrr = <32 35>; ++ }; ++ }; ++ port_scheduler_config { ++ port@0 { ++ port_id = <0>; ++ l1scheduler { ++ group@0 { ++ sp = <0 1>; /*L0 SPs*/ ++ /*cpri cdrr epri edrr*/ ++ cfg = <0 0 0 0>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ /*unicast queues*/ ++ ucast_queue = <0 4 8>; ++ /*multicast queues*/ ++ mcast_queue = <256 260>; ++ /*sp cpri cdrr epri edrr*/ ++ cfg = <0 0 0 0 0>; ++ }; ++ group@1 { ++ ucast_queue = <1 5 9>; ++ mcast_queue = <257 261>; ++ cfg = <0 1 1 1 1>; ++ }; ++ group@2 { ++ ucast_queue = <2 6 10>; ++ mcast_queue = <258 262>; ++ cfg = <0 2 2 2 2>; ++ }; ++ group@3 { ++ ucast_queue = <3 7 11>; ++ mcast_queue = <259 263>; ++ cfg = <0 3 3 3 3>; ++ }; ++ }; ++ }; ++ port@1 { ++ port_id = <1>; ++ l1scheduler { ++ group@0 { ++ sp = <36>; ++ cfg = <0 8 0 8>; ++ }; ++ group@1 { ++ sp = <37>; ++ cfg = <1 9 1 9>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <144>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <272>; ++ mcast_loop_pri = <4>; ++ cfg = <36 0 48 0 48>; ++ }; ++ }; ++ }; ++ port@2 { ++ port_id = <2>; ++ l1scheduler { ++ group@0 { ++ sp = <40>; ++ cfg = <0 12 0 12>; ++ }; ++ group@1 { ++ sp = <41>; ++ cfg = <1 13 1 13>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <160>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <276>; ++ mcast_loop_pri = <4>; ++ cfg = <40 0 64 0 64>; ++ }; ++ }; ++ }; ++ port@3 { ++ port_id = <3>; ++ l1scheduler { ++ group@0 { ++ sp = <44>; ++ cfg = <0 16 0 16>; ++ }; ++ group@1 { ++ sp = <45>; ++ cfg = <1 17 1 17>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <176>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <280>; ++ mcast_loop_pri = <4>; ++ cfg = <44 0 80 0 80>; ++ }; ++ }; ++ }; ++ port@4 { ++ port_id = <4>; ++ l1scheduler { ++ group@0 { ++ sp = <48>; ++ cfg = <0 20 0 20>; ++ }; ++ group@1 { ++ sp = <49>; ++ cfg = <1 21 1 21>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <192>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <284>; ++ mcast_loop_pri = <4>; ++ cfg = <48 0 96 0 96>; ++ }; ++ }; ++ }; ++ port@5 { ++ port_id = <5>; ++ l1scheduler { ++ group@0 { ++ sp = <52>; ++ cfg = <0 24 0 24>; ++ }; ++ group@1 { ++ sp = <53>; ++ cfg = <1 25 1 25>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <208>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <288>; ++ mcast_loop_pri = <4>; ++ cfg = <52 0 112 0 112>; ++ }; ++ }; ++ }; ++ port@6 { ++ port_id = <6>; ++ l1scheduler { ++ group@0 { ++ sp = <56>; ++ cfg = <0 28 0 28>; ++ }; ++ group@1 { ++ sp = <57>; ++ cfg = <1 29 1 29>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <224>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <292>; ++ mcast_loop_pri = <4>; ++ cfg = <56 0 128 0 128>; ++ }; ++ }; ++ }; ++ port@7 { ++ port_id = <7>; ++ l1scheduler { ++ group@0 { ++ sp = <60>; ++ cfg = <0 32 0 32>; ++ }; ++ group@1 { ++ sp = <61>; ++ cfg = <1 33 1 33>; ++ }; ++ }; ++ l0scheduler { ++ group@0 { ++ ucast_queue = <240>; ++ ucast_loop_pri = <16>; ++ mcast_queue = <296>; ++ cfg = <60 0 144 0 144>; ++ }; ++ }; ++ }; ++ }; ++ }; ++ ++ dp1 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <4>; ++ reg = <0x3a001600 0x200>; ++ qcom,mactype = <0>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <3>; ++ phy-mode = "sgmii"; ++ }; ++ ++ dp2 { ++ device_type = "network"; ++ compatible = "qcom,nss-dp"; ++ qcom,id = <6>; ++ reg = <0x3a007000 0x3fff>; ++ qcom,mactype = <1>; ++ local-mac-address = [000000000000]; ++ qcom,link-poll = <1>; ++ qcom,phy-mdio-addr = <8>; ++ phy-mode = "sgmii"; ++ }; ++ wifi3: wifi3@f00000 { ++ compatible = "qcom,cnss-qcn9000"; ++ wlan-en-gpio = <&tlmm 57 0>; ++ pinctrl-names = "wlan_en_active", "wlan_en_sleep"; ++ pinctrl-0 = <&cnss_wlan_en_active>; ++ pinctrl-1 = <&cnss_wlan_en_sleep>; ++ status = "disabled"; ++ }; ++}; ++ ++&serial_blsp4 { ++ pinctrl-0 = <&uart_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&spi_0 { /* BLSP1 QUP1 */ ++ pinctrl-0 = <&spi_0_pins>; ++ pinctrl-names = "default"; ++ cs-select = <0>; ++ status = "ok"; ++ ++ m25p80@0 { ++ #address-cells = <1>; ++ #size-cells = <1>; ++ reg = <0>; ++ compatible = "n25q128a11"; ++ linux,modalias = "m25p80", "n25q128a11"; ++ spi-max-frequency = <50000000>; ++ use-default-sizes; ++ }; ++}; ++ ++&serial_blsp2 { ++ pinctrl-0 = <&hsuart_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&msm_imem { ++ status = "disabled"; ++}; ++ ++&ssphy_0 { ++ status = "ok"; ++}; ++ ++&qusb_phy_0 { ++ status = "ok"; ++}; ++ ++&ssphy_1 { ++ status = "ok"; ++}; ++ ++&qusb_phy_1 { ++ status = "ok"; ++}; ++ ++&usb3_0 { ++ status = "ok"; ++}; ++ ++&usb3_1 { ++ status = "ok"; ++}; ++ ++&cryptobam { ++ status = "ok"; ++}; ++ ++&crypto { ++ status = "ok"; ++}; ++ ++&i2c_0 { ++ pinctrl-0 = <&i2c_0_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&i2c_1 { ++ status = "disabled"; ++}; ++ ++&qpic_bam { ++ status = "ok"; ++}; ++ ++&nand { ++ pinctrl-0 = <&qpic_pins>; ++ pinctrl-names = "default"; ++ status = "ok"; ++}; ++ ++&pcie0 { ++ status = "disabled"; ++}; +Index: linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/drivers/net/phy/aquantia.c +=================================================================== +--- linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce.orig/drivers/net/phy/aquantia.c ++++ linux-4.4.60-qsdk-10fd7d14853b7020b804acae690c8acec5d954ce/drivers/net/phy/aquantia.c +@@ -29,6 +29,7 @@ + #define PHY_ID_AQR109 0x03a1b502 + #define PHY_ID_AQR111 0x03a1b610 + #define PHY_ID_AQR111B0 0x03a1b612 ++#define PHY_ID_AQR111C 0x03a1b7e2 + #define PHY_ID_AQR112 0x03a1b660 + #define PHY_ID_AQR113C 0x31c31C10 + #define PHY_ID_AQR112C 0x03a1b792 +@@ -701,6 +702,23 @@ static struct phy_driver aquantia_driver + .driver = { .owner = THIS_MODULE,}, + }, + { ++ .phy_id = PHY_ID_AQR111C, ++ .phy_id_mask = 0xfffffff0, ++ .name = "Aquantia AQR111C", ++ .features = PHY_AQUANTIA_FEATURES, ++ .flags = PHY_HAS_INTERRUPT, ++ .probe = aquantia_phy_probe, ++ .soft_reset = aquantia_soft_reset, ++ .config_init = aquantia_config_init, ++ .aneg_done = aquantia_aneg_done, ++ .config_aneg = aquantia_config_aneg, ++ .config_intr = aquantia_config_intr, ++ .ack_interrupt = aquantia_ack_interrupt, ++ .read_status = aquantia_read_status, ++ .update_link = aquantia_update_link, ++ .driver = { .owner = THIS_MODULE,}, ++}, ++{ + .phy_id = PHY_ID_AQR112, + .phy_id_mask = 0xfffffff0, + .name = "Aquantia AQR112", +@@ -790,6 +808,7 @@ static struct mdio_device_id __maybe_unu + { PHY_ID_AQR109, 0xfffffff0 }, + { PHY_ID_AQR111, 0xfffffff0 }, + { PHY_ID_AQR111B0, 0xfffffff0 }, ++ { PHY_ID_AQR111C, 0xfffffff0 }, + { PHY_ID_AQR112, 0xfffffff0 }, + { PHY_ID_AQR113C, 0xfffffff0 }, + { PHY_ID_AQR112C, 0xfffffff0 }, diff --git a/target/linux/ipq807x/patches/112-pstore.patch b/target/linux/ipq807x/patches/112-pstore.patch new file mode 100644 index 0000000000..dc3960306d --- /dev/null +++ b/target/linux/ipq807x/patches/112-pstore.patch @@ -0,0 +1,147 @@ +Index: linux-4.4.60-qsdk-11f09717303ecd83c3a64e9efe23f25921dc1016/arch/arm64/boot/dts/qcom/qcom-ipq6018-memory.dtsi +=================================================================== +--- linux-4.4.60-qsdk-11f09717303ecd83c3a64e9efe23f25921dc1016.orig/arch/arm64/boot/dts/qcom/qcom-ipq6018-memory.dtsi ++++ linux-4.4.60-qsdk-11f09717303ecd83c3a64e9efe23f25921dc1016/arch/arm64/boot/dts/qcom/qcom-ipq6018-memory.dtsi +@@ -92,6 +92,12 @@ + reg = <0x0 0x40000000 0x0 0x00800000>; + }; + ++ ramoops@4A0f0000 { ++ compatible = "ramoops"; ++ reg = <0 0x4A0f0000 0 0x10000>; ++ record-size = <0x1000>; ++ }; ++ + uboot@4A100000 { + no-map; + reg = <0x0 0x4A100000 0x0 0x00400000>; +@@ -211,6 +217,12 @@ + reg = <0x0 0x40000000 0x0 0x01000000>; + }; + ++ ramoops@4A0f0000 { ++ compatible = "ramoops"; ++ reg = <0 0x4A0f0000 0 0x10000>; ++ record-size = <0x1000>; ++ }; ++ + uboot@4A100000 { + no-map; + reg = <0x0 0x4A100000 0x0 0x00400000>; +@@ -330,6 +342,12 @@ + reg = <0x0 0x40000000 0x0 0x01000000>; + }; + ++ ramoops@4A0f0000 { ++ compatible = "ramoops"; ++ reg = <0 0x4A0f0000 0 0x10000>; ++ record-size = <0x1000>; ++ }; ++ + uboot@4A100000 { + no-map; + reg = <0x0 0x4A100000 0x0 0x00400000>; +Index: linux-4.4.60-qsdk-11f09717303ecd83c3a64e9efe23f25921dc1016/fs/pstore/ram.c +=================================================================== +--- linux-4.4.60-qsdk-11f09717303ecd83c3a64e9efe23f25921dc1016.orig/fs/pstore/ram.c ++++ linux-4.4.60-qsdk-11f09717303ecd83c3a64e9efe23f25921dc1016/fs/pstore/ram.c +@@ -466,15 +466,46 @@ static int ramoops_init_prz(struct devic + return 0; + } + ++static int ramoops_parse_dt(struct platform_device *pdev, ++ struct ramoops_platform_data *pdata) ++{ ++ struct resource *res; ++ ++ dev_dbg(&pdev->dev, "using Device Tree\n"); ++ ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!res) { ++ dev_err(&pdev->dev, ++ "failed to locate DT /reserved-memory resource\n"); ++ return -EINVAL; ++ } ++ ++ pdata->mem_size = resource_size(res); ++ pdata->mem_address = res->start; ++ pdata->dump_oops = true; ++ pdata->record_size = 0x1000; ++ return 0; ++} ++ + static int ramoops_probe(struct platform_device *pdev) + { + struct device *dev = &pdev->dev; + struct ramoops_platform_data *pdata = pdev->dev.platform_data; ++ struct ramoops_platform_data pdata_local; + struct ramoops_context *cxt = &oops_cxt; + size_t dump_mem_sz; + phys_addr_t paddr; + int err = -EINVAL; + ++ if (dev_of_node(dev) && !pdata) { ++ pdata = &pdata_local; ++ memset(pdata, 0, sizeof(*pdata)); ++ ++ err = ramoops_parse_dt(pdev, pdata); ++ if (err < 0) ++ goto fail_out; ++ } ++ + /* Only a single ramoops area allowed at a time, so fail extra + * probes. + */ +@@ -603,11 +634,17 @@ static int ramoops_remove(struct platfor + return 0; + } + ++static const struct of_device_id dt_match[] = { ++ { .compatible = "ramoops" }, ++ {} ++}; ++ + static struct platform_driver ramoops_driver = { + .probe = ramoops_probe, + .remove = ramoops_remove, + .driver = { + .name = "ramoops", ++ .of_match_table = dt_match, + }, + }; + +Index: linux-4.4.60-qsdk-11f09717303ecd83c3a64e9efe23f25921dc1016/drivers/of/platform.c +=================================================================== +--- linux-4.4.60-qsdk-11f09717303ecd83c3a64e9efe23f25921dc1016.orig/drivers/of/platform.c ++++ linux-4.4.60-qsdk-11f09717303ecd83c3a64e9efe23f25921dc1016/drivers/of/platform.c +@@ -53,6 +53,30 @@ struct platform_device *of_find_device_b + } + EXPORT_SYMBOL(of_find_device_by_node); + ++static const struct of_device_id reserved_mem_matches[] = { ++ { .compatible = "ramoops" }, ++ {} ++}; ++ ++static int __init of_platform_default_populate_init(void) ++{ ++ struct device_node *node; ++ ++ if (!of_have_populated_dt()) ++ return -ENODEV; ++ ++ /* ++ * Handle certain compatibles explicitly, since we don't want to create ++ * platform_devices for every node in /reserved-memory with a ++ * "compatible", ++ */ ++ for_each_matching_node(node, reserved_mem_matches) ++ of_platform_device_create(node, NULL, NULL); ++ ++ return 0; ++} ++arch_initcall_sync(of_platform_default_populate_init); ++ + #ifdef CONFIG_OF_ADDRESS + /* + * The following routines scan a subtree and registers a device for diff --git a/target/linux/ipq807x/patches/200-bpf_backport.patch b/target/linux/ipq807x/patches/200-bpf_backport.patch new file mode 100644 index 0000000000..4357369c29 --- /dev/null +++ b/target/linux/ipq807x/patches/200-bpf_backport.patch @@ -0,0 +1,44780 @@ +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -38,7 +38,7 @@ config ARM + select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 + select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) + select HAVE_ARCH_TRACEHOOK +- select HAVE_BPF_JIT ++ select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 + select HAVE_CC_STACKPROTECTOR + select HAVE_CONTEXT_TRACKING + select HAVE_C_RECORDMCOUNT +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -1,13 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0-only + /* +- * Just-In-Time compiler for BPF filters on 32bit ARM ++ * Just-In-Time compiler for eBPF filters on 32bit ARM + * ++ * Copyright (c) 2017 Shubham Bansal + * Copyright (c) 2011 Mircea Gherzan +- * +- * This program is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License as published by the +- * Free Software Foundation; version 2 of the License. + */ + ++#include + #include + #include + #include +@@ -20,51 +19,182 @@ + #include + #include + #include ++#include + + #include "bpf_jit_32.h" + + /* +- * ABI: ++ * eBPF prog stack layout: ++ * ++ * high ++ * original ARM_SP => +-----+ ++ * | | callee saved registers ++ * +-----+ <= (BPF_FP + SCRATCH_SIZE) ++ * | ... | eBPF JIT scratch space ++ * eBPF fp register => +-----+ ++ * (BPF_FP) | ... | eBPF prog stack ++ * +-----+ ++ * |RSVD | JIT scratchpad ++ * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) ++ * | | ++ * | ... | Function call stack ++ * | | ++ * +-----+ ++ * low ++ * ++ * The callee saved registers depends on whether frame pointers are enabled. ++ * With frame pointers (to be compliant with the ABI): ++ * ++ * high ++ * original ARM_SP => +--------------+ \ ++ * | pc | | ++ * current ARM_FP => +--------------+ } callee saved registers ++ * |r4-r9,fp,ip,lr| | ++ * +--------------+ / ++ * low + * +- * r0 scratch register +- * r4 BPF register A +- * r5 BPF register X +- * r6 pointer to the skb +- * r7 skb->data +- * r8 skb_headlen(skb) ++ * Without frame pointers: ++ * ++ * high ++ * original ARM_SP => +--------------+ ++ * | r4-r9,fp,lr | callee saved registers ++ * current ARM_FP => +--------------+ ++ * low ++ * ++ * When popping registers off the stack at the end of a BPF function, we ++ * reference them via the current ARM_FP register. + */ ++#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ ++ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ ++ 1 << ARM_FP) ++#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) ++#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) ++ ++enum { ++ /* Stack layout - these are offsets from (top of stack - 4) */ ++ BPF_R2_HI, ++ BPF_R2_LO, ++ BPF_R3_HI, ++ BPF_R3_LO, ++ BPF_R4_HI, ++ BPF_R4_LO, ++ BPF_R5_HI, ++ BPF_R5_LO, ++ BPF_R7_HI, ++ BPF_R7_LO, ++ BPF_R8_HI, ++ BPF_R8_LO, ++ BPF_R9_HI, ++ BPF_R9_LO, ++ BPF_FP_HI, ++ BPF_FP_LO, ++ BPF_TC_HI, ++ BPF_TC_LO, ++ BPF_AX_HI, ++ BPF_AX_LO, ++ /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, ++ * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, ++ * BPF_REG_FP and Tail call counts. ++ */ ++ BPF_JIT_SCRATCH_REGS, ++}; ++ ++/* ++ * Negative "register" values indicate the register is stored on the stack ++ * and are the offset from the top of the eBPF JIT scratch space. ++ */ ++#define STACK_OFFSET(k) (-4 - (k) * 4) ++#define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4) ++ ++#ifdef CONFIG_FRAME_POINTER ++#define EBPF_SCRATCH_TO_ARM_FP(x) ((x) - 4 * hweight16(CALLEE_PUSH_MASK) - 4) ++#else ++#define EBPF_SCRATCH_TO_ARM_FP(x) (x) ++#endif + +-#define r_scratch ARM_R0 +-/* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */ +-#define r_off ARM_R1 +-#define r_A ARM_R4 +-#define r_X ARM_R5 +-#define r_skb ARM_R6 +-#define r_skb_data ARM_R7 +-#define r_skb_hl ARM_R8 +- +-#define SCRATCH_SP_OFFSET 0 +-#define SCRATCH_OFF(k) (SCRATCH_SP_OFFSET + 4 * (k)) +- +-#define SEEN_MEM ((1 << BPF_MEMWORDS) - 1) +-#define SEEN_MEM_WORD(k) (1 << (k)) +-#define SEEN_X (1 << BPF_MEMWORDS) +-#define SEEN_CALL (1 << (BPF_MEMWORDS + 1)) +-#define SEEN_SKB (1 << (BPF_MEMWORDS + 2)) +-#define SEEN_DATA (1 << (BPF_MEMWORDS + 3)) ++#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ ++#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ ++#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ + +-#define FLAG_NEED_X_RESET (1 << 0) +-#define FLAG_IMM_OVERFLOW (1 << 1) ++#define FLAG_IMM_OVERFLOW (1 << 0) ++ ++/* ++ * Map eBPF registers to ARM 32bit registers or stack scratch space. ++ * ++ * 1. First argument is passed using the arm 32bit registers and rest of the ++ * arguments are passed on stack scratch space. ++ * 2. First callee-saved argument is mapped to arm 32 bit registers and rest ++ * arguments are mapped to scratch space on stack. ++ * 3. We need two 64 bit temp registers to do complex operations on eBPF ++ * registers. ++ * ++ * As the eBPF registers are all 64 bit registers and arm has only 32 bit ++ * registers, we have to map each eBPF registers with two arm 32 bit regs or ++ * scratch memory space and we have to build eBPF 64 bit register from those. ++ * ++ */ ++static const s8 bpf2a32[][2] = { ++ /* return value from in-kernel function, and exit value from eBPF */ ++ [BPF_REG_0] = {ARM_R1, ARM_R0}, ++ /* arguments from eBPF program to in-kernel function */ ++ [BPF_REG_1] = {ARM_R3, ARM_R2}, ++ /* Stored on stack scratch space */ ++ [BPF_REG_2] = {STACK_OFFSET(BPF_R2_HI), STACK_OFFSET(BPF_R2_LO)}, ++ [BPF_REG_3] = {STACK_OFFSET(BPF_R3_HI), STACK_OFFSET(BPF_R3_LO)}, ++ [BPF_REG_4] = {STACK_OFFSET(BPF_R4_HI), STACK_OFFSET(BPF_R4_LO)}, ++ [BPF_REG_5] = {STACK_OFFSET(BPF_R5_HI), STACK_OFFSET(BPF_R5_LO)}, ++ /* callee saved registers that in-kernel function will preserve */ ++ [BPF_REG_6] = {ARM_R5, ARM_R4}, ++ /* Stored on stack scratch space */ ++ [BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)}, ++ [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)}, ++ [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, ++ /* Read only Frame Pointer to access Stack */ ++ [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)}, ++ /* Temporary Register for internal BPF JIT, can be used ++ * for constant blindings and others. ++ */ ++ [TMP_REG_1] = {ARM_R7, ARM_R6}, ++ [TMP_REG_2] = {ARM_R9, ARM_R8}, ++ /* Tail call count. Stored on stack scratch space. */ ++ [TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)}, ++ /* temporary register for blinding constants. ++ * Stored on stack scratch space. ++ */ ++ [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)}, ++}; ++ ++#define dst_lo dst[1] ++#define dst_hi dst[0] ++#define src_lo src[1] ++#define src_hi src[0] ++ ++/* ++ * JIT Context: ++ * ++ * prog : bpf_prog ++ * idx : index of current last JITed instruction. ++ * prologue_bytes : bytes used in prologue. ++ * epilogue_offset : offset of epilogue starting. ++ * offsets : array of eBPF instruction offsets in ++ * JITed code. ++ * target : final JITed code. ++ * epilogue_bytes : no of bytes used in epilogue. ++ * imm_count : no of immediate counts used for global ++ * variables. ++ * imms : array of global variable addresses. ++ */ + + struct jit_ctx { +- const struct bpf_prog *skf; +- unsigned idx; +- unsigned prologue_bytes; +- int ret0_fp_idx; +- u32 seen; ++ const struct bpf_prog *prog; ++ unsigned int idx; ++ unsigned int prologue_bytes; ++ unsigned int epilogue_offset; ++ unsigned int cpu_architecture; + u32 flags; + u32 *offsets; + u32 *target; ++ u32 stack_size; + #if __LINUX_ARM_ARCH__ < 7 + u16 epilogue_bytes; + u16 imm_count; +@@ -72,68 +202,16 @@ struct jit_ctx { + #endif + }; + +-int bpf_jit_enable __read_mostly; +- +-static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret, +- unsigned int size) +-{ +- void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size); +- +- if (!ptr) +- return -EFAULT; +- memcpy(ret, ptr, size); +- return 0; +-} +- +-static u64 jit_get_skb_b(struct sk_buff *skb, int offset) +-{ +- u8 ret; +- int err; +- +- if (offset < 0) +- err = call_neg_helper(skb, offset, &ret, 1); +- else +- err = skb_copy_bits(skb, offset, &ret, 1); +- +- return (u64)err << 32 | ret; +-} +- +-static u64 jit_get_skb_h(struct sk_buff *skb, int offset) +-{ +- u16 ret; +- int err; +- +- if (offset < 0) +- err = call_neg_helper(skb, offset, &ret, 2); +- else +- err = skb_copy_bits(skb, offset, &ret, 2); +- +- return (u64)err << 32 | ntohs(ret); +-} +- +-static u64 jit_get_skb_w(struct sk_buff *skb, int offset) +-{ +- u32 ret; +- int err; +- +- if (offset < 0) +- err = call_neg_helper(skb, offset, &ret, 4); +- else +- err = skb_copy_bits(skb, offset, &ret, 4); +- +- return (u64)err << 32 | ntohl(ret); +-} +- + /* + * Wrappers which handle both OABI and EABI and assures Thumb2 interworking + * (where the assembly routines like __aeabi_uidiv could cause problems). + */ +-static u32 jit_udiv(u32 dividend, u32 divisor) ++static u32 jit_udiv32(u32 dividend, u32 divisor) + { + return dividend / divisor; + } + +-static u32 jit_mod(u32 dividend, u32 divisor) ++static u32 jit_mod32(u32 dividend, u32 divisor) + { + return dividend % divisor; + } +@@ -157,36 +235,100 @@ static inline void emit(u32 inst, struct + _emit(ARM_COND_AL, inst, ctx); + } + +-static u16 saved_regs(struct jit_ctx *ctx) ++/* ++ * This is rather horrid, but necessary to convert an integer constant ++ * to an immediate operand for the opcodes, and be able to detect at ++ * build time whether the constant can't be converted (iow, usable in ++ * BUILD_BUG_ON()). ++ */ ++#define imm12val(v, s) (rol32(v, (s)) | (s) << 7) ++#define const_imm8m(x) \ ++ ({ int r; \ ++ u32 v = (x); \ ++ if (!(v & ~0x000000ff)) \ ++ r = imm12val(v, 0); \ ++ else if (!(v & ~0xc000003f)) \ ++ r = imm12val(v, 2); \ ++ else if (!(v & ~0xf000000f)) \ ++ r = imm12val(v, 4); \ ++ else if (!(v & ~0xfc000003)) \ ++ r = imm12val(v, 6); \ ++ else if (!(v & ~0xff000000)) \ ++ r = imm12val(v, 8); \ ++ else if (!(v & ~0x3fc00000)) \ ++ r = imm12val(v, 10); \ ++ else if (!(v & ~0x0ff00000)) \ ++ r = imm12val(v, 12); \ ++ else if (!(v & ~0x03fc0000)) \ ++ r = imm12val(v, 14); \ ++ else if (!(v & ~0x00ff0000)) \ ++ r = imm12val(v, 16); \ ++ else if (!(v & ~0x003fc000)) \ ++ r = imm12val(v, 18); \ ++ else if (!(v & ~0x000ff000)) \ ++ r = imm12val(v, 20); \ ++ else if (!(v & ~0x0003fc00)) \ ++ r = imm12val(v, 22); \ ++ else if (!(v & ~0x0000ff00)) \ ++ r = imm12val(v, 24); \ ++ else if (!(v & ~0x00003fc0)) \ ++ r = imm12val(v, 26); \ ++ else if (!(v & ~0x00000ff0)) \ ++ r = imm12val(v, 28); \ ++ else if (!(v & ~0x000003fc)) \ ++ r = imm12val(v, 30); \ ++ else \ ++ r = -1; \ ++ r; }) ++ ++/* ++ * Checks if immediate value can be converted to imm12(12 bits) value. ++ */ ++static int imm8m(u32 x) + { +- u16 ret = 0; ++ u32 rot; + +- if ((ctx->skf->len > 1) || +- (ctx->skf->insns[0].code == (BPF_RET | BPF_A))) +- ret |= 1 << r_A; ++ for (rot = 0; rot < 16; rot++) ++ if ((x & ~ror32(0xff, 2 * rot)) == 0) ++ return rol32(x, 2 * rot) | (rot << 8); ++ return -1; ++} + +-#ifdef CONFIG_FRAME_POINTER +- ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC); +-#else +- if (ctx->seen & SEEN_CALL) +- ret |= 1 << ARM_LR; +-#endif +- if (ctx->seen & (SEEN_DATA | SEEN_SKB)) +- ret |= 1 << r_skb; +- if (ctx->seen & SEEN_DATA) +- ret |= (1 << r_skb_data) | (1 << r_skb_hl); +- if (ctx->seen & SEEN_X) +- ret |= 1 << r_X; ++#define imm8m(x) (__builtin_constant_p(x) ? const_imm8m(x) : imm8m(x)) + +- return ret; ++static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12) ++{ ++ op |= rt << 12 | rn << 16; ++ if (imm12 >= 0) ++ op |= ARM_INST_LDST__U; ++ else ++ imm12 = -imm12; ++ return op | (imm12 & ARM_INST_LDST__IMM12); + } + +-static inline int mem_words_used(struct jit_ctx *ctx) ++static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) + { +- /* yes, we do waste some stack space IF there are "holes" in the set" */ +- return fls(ctx->seen & SEEN_MEM); ++ op |= rt << 12 | rn << 16; ++ if (imm8 >= 0) ++ op |= ARM_INST_LDST__U; ++ else ++ imm8 = -imm8; ++ return op | (imm8 & 0xf0) << 4 | (imm8 & 0x0f); + } + ++#define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off) ++#define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off) ++#define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) ++#define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) ++ ++#define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) ++#define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) ++#define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) ++#define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off) ++ ++/* ++ * Initializes the JIT space with undefined instructions. ++ */ + static void jit_fill_hole(void *area, unsigned int size) + { + u32 *ptr; +@@ -195,88 +337,23 @@ static void jit_fill_hole(void *area, un + *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); + } + +-static void build_prologue(struct jit_ctx *ctx) +-{ +- u16 reg_set = saved_regs(ctx); +- u16 off; +- +-#ifdef CONFIG_FRAME_POINTER +- emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); +- emit(ARM_PUSH(reg_set), ctx); +- emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); ++#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) ++/* EABI requires the stack to be aligned to 64-bit boundaries */ ++#define STACK_ALIGNMENT 8 + #else +- if (reg_set) +- emit(ARM_PUSH(reg_set), ctx); ++/* Stack must be aligned to 32-bit boundaries */ ++#define STACK_ALIGNMENT 4 + #endif + +- if (ctx->seen & (SEEN_DATA | SEEN_SKB)) +- emit(ARM_MOV_R(r_skb, ARM_R0), ctx); +- +- if (ctx->seen & SEEN_DATA) { +- off = offsetof(struct sk_buff, data); +- emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx); +- /* headlen = len - data_len */ +- off = offsetof(struct sk_buff, len); +- emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx); +- off = offsetof(struct sk_buff, data_len); +- emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); +- emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx); +- } +- +- if (ctx->flags & FLAG_NEED_X_RESET) +- emit(ARM_MOV_I(r_X, 0), ctx); +- +- /* do not leak kernel data to userspace */ +- if (bpf_needs_clear_a(&ctx->skf->insns[0])) +- emit(ARM_MOV_I(r_A, 0), ctx); +- +- /* stack space for the BPF_MEM words */ +- if (ctx->seen & SEEN_MEM) +- emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); +-} +- +-static void build_epilogue(struct jit_ctx *ctx) +-{ +- u16 reg_set = saved_regs(ctx); +- +- if (ctx->seen & SEEN_MEM) +- emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); +- +- reg_set &= ~(1 << ARM_LR); +- +-#ifdef CONFIG_FRAME_POINTER +- /* the first instruction of the prologue was: mov ip, sp */ +- reg_set &= ~(1 << ARM_IP); +- reg_set |= (1 << ARM_SP); +- emit(ARM_LDM(ARM_SP, reg_set), ctx); +-#else +- if (reg_set) { +- if (ctx->seen & SEEN_CALL) +- reg_set |= 1 << ARM_PC; +- emit(ARM_POP(reg_set), ctx); +- } +- +- if (!(ctx->seen & SEEN_CALL)) +- emit(ARM_BX(ARM_LR), ctx); +-#endif +-} +- +-static int16_t imm8m(u32 x) +-{ +- u32 rot; +- +- for (rot = 0; rot < 16; rot++) +- if ((x & ~ror32(0xff, 2 * rot)) == 0) +- return rol32(x, 2 * rot) | (rot << 8); +- +- return -1; +-} ++/* total stack size used in JITed code */ ++#define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE) ++#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) + + #if __LINUX_ARM_ARCH__ < 7 + + static u16 imm_offset(u32 k, struct jit_ctx *ctx) + { +- unsigned i = 0, offset; ++ unsigned int i = 0, offset; + u16 imm; + + /* on the "fake" run we just count them (duplicates included) */ +@@ -295,7 +372,7 @@ static u16 imm_offset(u32 k, struct jit_ + ctx->imms[i] = k; + + /* constants go just after the epilogue */ +- offset = ctx->offsets[ctx->skf->len]; ++ offset = ctx->offsets[ctx->prog->len - 1] * 4; + offset += ctx->prologue_bytes; + offset += ctx->epilogue_bytes; + offset += i * 4; +@@ -319,10 +396,22 @@ static u16 imm_offset(u32 k, struct jit_ + + #endif /* __LINUX_ARM_ARCH__ */ + ++static inline int bpf2a32_offset(int bpf_to, int bpf_from, ++ const struct jit_ctx *ctx) { ++ int to, from; ++ ++ if (ctx->target == NULL) ++ return 0; ++ to = ctx->offsets[bpf_to]; ++ from = ctx->offsets[bpf_from]; ++ ++ return to - from - 1; ++} ++ + /* + * Move an immediate that's not an imm8m to a core register. + */ +-static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx) ++static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx) + { + #if __LINUX_ARM_ARCH__ < 7 + emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); +@@ -333,7 +422,7 @@ static inline void emit_mov_i_no8m(int r + #endif + } + +-static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx) ++static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx) + { + int imm12 = imm8m(val); + +@@ -343,676 +432,1508 @@ static inline void emit_mov_i(int rd, u3 + emit_mov_i_no8m(rd, val, ctx); + } + +-#if __LINUX_ARM_ARCH__ < 6 ++static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx) ++{ ++ if (elf_hwcap & HWCAP_THUMB) ++ emit(ARM_BX(tgt_reg), ctx); ++ else ++ emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); ++} + +-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) ++static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) + { +- _emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx); +- _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); +- _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx); +- _emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx); +- _emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx); +- _emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx); +- _emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx); +- _emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx); ++#if __LINUX_ARM_ARCH__ < 5 ++ emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); ++ emit_bx_r(tgt_reg, ctx); ++#else ++ emit(ARM_BLX_R(tgt_reg), ctx); ++#endif + } + +-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) ++static inline int epilogue_offset(const struct jit_ctx *ctx) + { +- _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); +- _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx); +- _emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx); ++ int to, from; ++ /* No need for 1st dummy run */ ++ if (ctx->target == NULL) ++ return 0; ++ to = ctx->epilogue_offset; ++ from = ctx->idx; ++ ++ return to - from - 2; + } + +-static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx) ++static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) + { +- /* r_dst = (r_src << 8) | (r_src >> 8) */ +- emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx); +- emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx); ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ ++#if __LINUX_ARM_ARCH__ == 7 ++ if (elf_hwcap & HWCAP_IDIVA) { ++ if (op == BPF_DIV) ++ emit(ARM_UDIV(rd, rm, rn), ctx); ++ else { ++ emit(ARM_UDIV(ARM_IP, rm, rn), ctx); ++ emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx); ++ } ++ return; ++ } ++#endif + + /* +- * we need to mask out the bits set in r_dst[23:16] due to +- * the first shift instruction. +- * +- * note that 0x8ff is the encoded immediate 0x00ff0000. ++ * For BPF_ALU | BPF_DIV | BPF_K instructions ++ * As ARM_R1 and ARM_R0 contains 1st argument of bpf ++ * function, we need to save it on caller side to save ++ * it from getting destroyed within callee. ++ * After the return from the callee, we restore ARM_R0 ++ * ARM_R1. + */ +- emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx); ++ if (rn != ARM_R1) { ++ emit(ARM_MOV_R(tmp[0], ARM_R1), ctx); ++ emit(ARM_MOV_R(ARM_R1, rn), ctx); ++ } ++ if (rm != ARM_R0) { ++ emit(ARM_MOV_R(tmp[1], ARM_R0), ctx); ++ emit(ARM_MOV_R(ARM_R0, rm), ctx); ++ } ++ ++ /* Call appropriate function */ ++ emit_mov_i(ARM_IP, op == BPF_DIV ? ++ (u32)jit_udiv32 : (u32)jit_mod32, ctx); ++ emit_blx_r(ARM_IP, ctx); ++ ++ /* Save return value */ ++ if (rd != ARM_R0) ++ emit(ARM_MOV_R(rd, ARM_R0), ctx); ++ ++ /* Restore ARM_R0 and ARM_R1 */ ++ if (rn != ARM_R1) ++ emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx); ++ if (rm != ARM_R0) ++ emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); + } + +-#else /* ARMv6+ */ ++/* Is the translated BPF register on stack? */ ++static bool is_stacked(s8 reg) ++{ ++ return reg < 0; ++} + +-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) ++/* If a BPF register is on the stack (stk is true), load it to the ++ * supplied temporary register and return the temporary register ++ * for subsequent operations, otherwise just use the CPU register. ++ */ ++static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx) + { +- _emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx); +-#ifdef __LITTLE_ENDIAN +- _emit(cond, ARM_REV(r_res, r_res), ctx); +-#endif ++ if (is_stacked(reg)) { ++ emit(ARM_LDR_I(tmp, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); ++ reg = tmp; ++ } ++ return reg; + } + +-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) ++static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, ++ struct jit_ctx *ctx) + { +- _emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx); +-#ifdef __LITTLE_ENDIAN +- _emit(cond, ARM_REV16(r_res, r_res), ctx); +-#endif ++ if (is_stacked(reg[1])) { ++ if (__LINUX_ARM_ARCH__ >= 6 || ++ ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { ++ emit(ARM_LDRD_I(tmp[1], ARM_FP, ++ EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); ++ } else { ++ emit(ARM_LDR_I(tmp[1], ARM_FP, ++ EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); ++ emit(ARM_LDR_I(tmp[0], ARM_FP, ++ EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); ++ } ++ reg = tmp; ++ } ++ return reg; + } + +-static inline void emit_swap16(u8 r_dst __maybe_unused, +- u8 r_src __maybe_unused, +- struct jit_ctx *ctx __maybe_unused) ++/* If a BPF register is on the stack (stk is true), save the register ++ * back to the stack. If the source register is not the same, then ++ * move it into the correct register. ++ */ ++static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx) + { +-#ifdef __LITTLE_ENDIAN +- emit(ARM_REV16(r_dst, r_src), ctx); +-#endif ++ if (is_stacked(reg)) ++ emit(ARM_STR_I(src, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); ++ else if (reg != src) ++ emit(ARM_MOV_R(reg, src), ctx); ++} ++ ++static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, ++ struct jit_ctx *ctx) ++{ ++ if (is_stacked(reg[1])) { ++ if (__LINUX_ARM_ARCH__ >= 6 || ++ ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { ++ emit(ARM_STRD_I(src[1], ARM_FP, ++ EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); ++ } else { ++ emit(ARM_STR_I(src[1], ARM_FP, ++ EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); ++ emit(ARM_STR_I(src[0], ARM_FP, ++ EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); ++ } ++ } else { ++ if (reg[1] != src[1]) ++ emit(ARM_MOV_R(reg[1], src[1]), ctx); ++ if (reg[0] != src[0]) ++ emit(ARM_MOV_R(reg[0], src[0]), ctx); ++ } + } + +-#endif /* __LINUX_ARM_ARCH__ < 6 */ ++static inline void emit_a32_mov_i(const s8 dst, const u32 val, ++ struct jit_ctx *ctx) ++{ ++ const s8 *tmp = bpf2a32[TMP_REG_1]; + ++ if (is_stacked(dst)) { ++ emit_mov_i(tmp[1], val, ctx); ++ arm_bpf_put_reg32(dst, tmp[1], ctx); ++ } else { ++ emit_mov_i(dst, val, ctx); ++ } ++} + +-/* Compute the immediate value for a PC-relative branch. */ +-static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx) ++static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx) + { +- u32 imm; ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *rd = is_stacked(dst_lo) ? tmp : dst; + +- if (ctx->target == NULL) +- return 0; +- /* +- * BPF allows only forward jumps and the offset of the target is +- * still the one computed during the first pass. ++ emit_mov_i(rd[1], (u32)val, ctx); ++ emit_mov_i(rd[0], val >> 32, ctx); ++ ++ arm_bpf_put_reg64(dst, rd, ctx); ++} ++ ++/* Sign extended move */ ++static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[], ++ const u32 val, struct jit_ctx *ctx) { ++ u64 val64 = val; ++ ++ if (is64 && (val & (1<<31))) ++ val64 |= 0xffffffff00000000ULL; ++ emit_a32_mov_i64(dst, val64, ctx); ++} ++ ++static inline void emit_a32_add_r(const u8 dst, const u8 src, ++ const bool is64, const bool hi, ++ struct jit_ctx *ctx) { ++ /* 64 bit : ++ * adds dst_lo, dst_lo, src_lo ++ * adc dst_hi, dst_hi, src_hi ++ * 32 bit : ++ * add dst_lo, dst_lo, src_lo + */ +- imm = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8); ++ if (!hi && is64) ++ emit(ARM_ADDS_R(dst, dst, src), ctx); ++ else if (hi && is64) ++ emit(ARM_ADC_R(dst, dst, src), ctx); ++ else ++ emit(ARM_ADD_R(dst, dst, src), ctx); ++} + +- return imm >> 2; ++static inline void emit_a32_sub_r(const u8 dst, const u8 src, ++ const bool is64, const bool hi, ++ struct jit_ctx *ctx) { ++ /* 64 bit : ++ * subs dst_lo, dst_lo, src_lo ++ * sbc dst_hi, dst_hi, src_hi ++ * 32 bit : ++ * sub dst_lo, dst_lo, src_lo ++ */ ++ if (!hi && is64) ++ emit(ARM_SUBS_R(dst, dst, src), ctx); ++ else if (hi && is64) ++ emit(ARM_SBC_R(dst, dst, src), ctx); ++ else ++ emit(ARM_SUB_R(dst, dst, src), ctx); ++} ++ ++static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, ++ const bool hi, const u8 op, struct jit_ctx *ctx){ ++ switch (BPF_OP(op)) { ++ /* dst = dst + src */ ++ case BPF_ADD: ++ emit_a32_add_r(dst, src, is64, hi, ctx); ++ break; ++ /* dst = dst - src */ ++ case BPF_SUB: ++ emit_a32_sub_r(dst, src, is64, hi, ctx); ++ break; ++ /* dst = dst | src */ ++ case BPF_OR: ++ emit(ARM_ORR_R(dst, dst, src), ctx); ++ break; ++ /* dst = dst & src */ ++ case BPF_AND: ++ emit(ARM_AND_R(dst, dst, src), ctx); ++ break; ++ /* dst = dst ^ src */ ++ case BPF_XOR: ++ emit(ARM_EOR_R(dst, dst, src), ctx); ++ break; ++ /* dst = dst * src */ ++ case BPF_MUL: ++ emit(ARM_MUL(dst, dst, src), ctx); ++ break; ++ /* dst = dst << src */ ++ case BPF_LSH: ++ emit(ARM_LSL_R(dst, dst, src), ctx); ++ break; ++ /* dst = dst >> src */ ++ case BPF_RSH: ++ emit(ARM_LSR_R(dst, dst, src), ctx); ++ break; ++ /* dst = dst >> src (signed)*/ ++ case BPF_ARSH: ++ emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx); ++ break; ++ } + } + +-#define OP_IMM3(op, r1, r2, imm_val, ctx) \ +- do { \ +- imm12 = imm8m(imm_val); \ +- if (imm12 < 0) { \ +- emit_mov_i_no8m(r_scratch, imm_val, ctx); \ +- emit(op ## _R((r1), (r2), r_scratch), ctx); \ +- } else { \ +- emit(op ## _I((r1), (r2), imm12), ctx); \ +- } \ +- } while (0) +- +-static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx) +-{ +- if (ctx->ret0_fp_idx >= 0) { +- _emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx); +- /* NOP to keep the size constant between passes */ +- emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx); ++/* ALU operation (32 bit) ++ * dst = dst (op) src ++ */ ++static inline void emit_a32_alu_r(const s8 dst, const s8 src, ++ struct jit_ctx *ctx, const bool is64, ++ const bool hi, const u8 op) { ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ s8 rn, rd; ++ ++ rn = arm_bpf_get_reg32(src, tmp[1], ctx); ++ rd = arm_bpf_get_reg32(dst, tmp[0], ctx); ++ /* ALU operation */ ++ emit_alu_r(rd, rn, is64, hi, op, ctx); ++ arm_bpf_put_reg32(dst, rd, ctx); ++} ++ ++/* ALU operation (64 bit) */ ++static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], ++ const s8 src[], struct jit_ctx *ctx, ++ const u8 op) { ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ const s8 *rd; ++ ++ rd = arm_bpf_get_reg64(dst, tmp, ctx); ++ if (is64) { ++ const s8 *rs; ++ ++ rs = arm_bpf_get_reg64(src, tmp2, ctx); ++ ++ /* ALU operation */ ++ emit_alu_r(rd[1], rs[1], true, false, op, ctx); ++ emit_alu_r(rd[0], rs[0], true, true, op, ctx); + } else { +- _emit(cond, ARM_MOV_I(ARM_R0, 0), ctx); +- _emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx); ++ s8 rs; ++ ++ rs = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); ++ ++ /* ALU operation */ ++ emit_alu_r(rd[1], rs, true, false, op, ctx); ++ if (!ctx->prog->aux->verifier_zext) ++ emit_a32_mov_i(rd[0], 0, ctx); + } ++ ++ arm_bpf_put_reg64(dst, rd, ctx); + } + +-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) +-{ +-#if __LINUX_ARM_ARCH__ < 5 +- emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); ++/* dst = src (4 bytes)*/ ++static inline void emit_a32_mov_r(const s8 dst, const s8 src, ++ struct jit_ctx *ctx) { ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ s8 rt; ++ ++ rt = arm_bpf_get_reg32(src, tmp[0], ctx); ++ arm_bpf_put_reg32(dst, rt, ctx); ++} ++ ++/* dst = src */ ++static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], ++ const s8 src[], ++ struct jit_ctx *ctx) { ++ if (!is64) { ++ emit_a32_mov_r(dst_lo, src_lo, ctx); ++ if (!ctx->prog->aux->verifier_zext) ++ /* Zero out high 4 bytes */ ++ emit_a32_mov_i(dst_hi, 0, ctx); ++ } else if (__LINUX_ARM_ARCH__ < 6 && ++ ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { ++ /* complete 8 byte move */ ++ emit_a32_mov_r(dst_lo, src_lo, ctx); ++ emit_a32_mov_r(dst_hi, src_hi, ctx); ++ } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { ++ const u8 *tmp = bpf2a32[TMP_REG_1]; ++ ++ emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); ++ emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); ++ } else if (is_stacked(src_lo)) { ++ emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); ++ } else if (is_stacked(dst_lo)) { ++ emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); ++ } else { ++ emit(ARM_MOV_R(dst[0], src[0]), ctx); ++ emit(ARM_MOV_R(dst[1], src[1]), ctx); ++ } ++} + +- if (elf_hwcap & HWCAP_THUMB) +- emit(ARM_BX(tgt_reg), ctx); +- else +- emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); +-#else +- emit(ARM_BLX_R(tgt_reg), ctx); +-#endif ++/* Shift operations */ ++static inline void emit_a32_alu_i(const s8 dst, const u32 val, ++ struct jit_ctx *ctx, const u8 op) { ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ s8 rd; ++ ++ rd = arm_bpf_get_reg32(dst, tmp[0], ctx); ++ ++ /* Do shift operation */ ++ switch (op) { ++ case BPF_LSH: ++ emit(ARM_LSL_I(rd, rd, val), ctx); ++ break; ++ case BPF_RSH: ++ emit(ARM_LSR_I(rd, rd, val), ctx); ++ break; ++ case BPF_NEG: ++ emit(ARM_RSB_I(rd, rd, val), ctx); ++ break; ++ } ++ ++ arm_bpf_put_reg32(dst, rd, ctx); + } + +-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, +- int bpf_op) +-{ +-#if __LINUX_ARM_ARCH__ == 7 +- if (elf_hwcap & HWCAP_IDIVA) { +- if (bpf_op == BPF_DIV) +- emit(ARM_UDIV(rd, rm, rn), ctx); +- else { +- emit(ARM_UDIV(ARM_R3, rm, rn), ctx); +- emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx); ++/* dst = ~dst (64 bit) */ ++static inline void emit_a32_neg64(const s8 dst[], ++ struct jit_ctx *ctx){ ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *rd; ++ ++ /* Setup Operand */ ++ rd = arm_bpf_get_reg64(dst, tmp, ctx); ++ ++ /* Do Negate Operation */ ++ emit(ARM_RSBS_I(rd[1], rd[1], 0), ctx); ++ emit(ARM_RSC_I(rd[0], rd[0], 0), ctx); ++ ++ arm_bpf_put_reg64(dst, rd, ctx); ++} ++ ++/* dst = dst << src */ ++static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], ++ struct jit_ctx *ctx) { ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ const s8 *rd; ++ s8 rt; ++ ++ /* Setup Operands */ ++ rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); ++ rd = arm_bpf_get_reg64(dst, tmp, ctx); ++ ++ /* Do LSH operation */ ++ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); ++ emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); ++ emit(ARM_MOV_SR(ARM_LR, rd[0], SRTYPE_ASL, rt), ctx); ++ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[1], SRTYPE_ASL, ARM_IP), ctx); ++ emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd[1], SRTYPE_LSR, tmp2[0]), ctx); ++ emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_ASL, rt), ctx); ++ ++ arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); ++ arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); ++} ++ ++/* dst = dst >> src (signed)*/ ++static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], ++ struct jit_ctx *ctx) { ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ const s8 *rd; ++ s8 rt; ++ ++ /* Setup Operands */ ++ rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); ++ rd = arm_bpf_get_reg64(dst, tmp, ctx); ++ ++ /* Do the ARSH operation */ ++ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); ++ emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); ++ emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); ++ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); ++ _emit(ARM_COND_MI, ARM_B(0), ctx); ++ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx); ++ emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx); ++ ++ arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); ++ arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); ++} ++ ++/* dst = dst >> src */ ++static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], ++ struct jit_ctx *ctx) { ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ const s8 *rd; ++ s8 rt; ++ ++ /* Setup Operands */ ++ rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); ++ rd = arm_bpf_get_reg64(dst, tmp, ctx); ++ ++ /* Do RSH operation */ ++ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); ++ emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); ++ emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); ++ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); ++ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_LSR, tmp2[0]), ctx); ++ emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_LSR, rt), ctx); ++ ++ arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); ++ arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); ++} ++ ++/* dst = dst << val */ ++static inline void emit_a32_lsh_i64(const s8 dst[], ++ const u32 val, struct jit_ctx *ctx){ ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ const s8 *rd; ++ ++ /* Setup operands */ ++ rd = arm_bpf_get_reg64(dst, tmp, ctx); ++ ++ /* Do LSH operation */ ++ if (val < 32) { ++ emit(ARM_MOV_SI(tmp2[0], rd[0], SRTYPE_ASL, val), ctx); ++ emit(ARM_ORR_SI(rd[0], tmp2[0], rd[1], SRTYPE_LSR, 32 - val), ctx); ++ emit(ARM_MOV_SI(rd[1], rd[1], SRTYPE_ASL, val), ctx); ++ } else { ++ if (val == 32) ++ emit(ARM_MOV_R(rd[0], rd[1]), ctx); ++ else ++ emit(ARM_MOV_SI(rd[0], rd[1], SRTYPE_ASL, val - 32), ctx); ++ emit(ARM_EOR_R(rd[1], rd[1], rd[1]), ctx); ++ } ++ ++ arm_bpf_put_reg64(dst, rd, ctx); ++} ++ ++/* dst = dst >> val */ ++static inline void emit_a32_rsh_i64(const s8 dst[], ++ const u32 val, struct jit_ctx *ctx) { ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ const s8 *rd; ++ ++ /* Setup operands */ ++ rd = arm_bpf_get_reg64(dst, tmp, ctx); ++ ++ /* Do LSR operation */ ++ if (val == 0) { ++ /* An immediate value of 0 encodes a shift amount of 32 ++ * for LSR. To shift by 0, don't do anything. ++ */ ++ } else if (val < 32) { ++ emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); ++ emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); ++ emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx); ++ } else if (val == 32) { ++ emit(ARM_MOV_R(rd[1], rd[0]), ctx); ++ emit(ARM_MOV_I(rd[0], 0), ctx); ++ } else { ++ emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_LSR, val - 32), ctx); ++ emit(ARM_MOV_I(rd[0], 0), ctx); ++ } ++ ++ arm_bpf_put_reg64(dst, rd, ctx); ++} ++ ++/* dst = dst >> val (signed) */ ++static inline void emit_a32_arsh_i64(const s8 dst[], ++ const u32 val, struct jit_ctx *ctx){ ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ const s8 *rd; ++ ++ /* Setup operands */ ++ rd = arm_bpf_get_reg64(dst, tmp, ctx); ++ ++ /* Do ARSH operation */ ++ if (val == 0) { ++ /* An immediate value of 0 encodes a shift amount of 32 ++ * for ASR. To shift by 0, don't do anything. ++ */ ++ } else if (val < 32) { ++ emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); ++ emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); ++ emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx); ++ } else if (val == 32) { ++ emit(ARM_MOV_R(rd[1], rd[0]), ctx); ++ emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); ++ } else { ++ emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_ASR, val - 32), ctx); ++ emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); ++ } ++ ++ arm_bpf_put_reg64(dst, rd, ctx); ++} ++ ++static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], ++ struct jit_ctx *ctx) { ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ const s8 *rd, *rt; ++ ++ /* Setup operands for multiplication */ ++ rd = arm_bpf_get_reg64(dst, tmp, ctx); ++ rt = arm_bpf_get_reg64(src, tmp2, ctx); ++ ++ /* Do Multiplication */ ++ emit(ARM_MUL(ARM_IP, rd[1], rt[0]), ctx); ++ emit(ARM_MUL(ARM_LR, rd[0], rt[1]), ctx); ++ emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); ++ ++ emit(ARM_UMULL(ARM_IP, rd[0], rd[1], rt[1]), ctx); ++ emit(ARM_ADD_R(rd[0], ARM_LR, rd[0]), ctx); ++ ++ arm_bpf_put_reg32(dst_lo, ARM_IP, ctx); ++ arm_bpf_put_reg32(dst_hi, rd[0], ctx); ++} ++ ++static bool is_ldst_imm(s16 off, const u8 size) ++{ ++ s16 off_max = 0; ++ ++ switch (size) { ++ case BPF_B: ++ case BPF_W: ++ off_max = 0xfff; ++ break; ++ case BPF_H: ++ off_max = 0xff; ++ break; ++ case BPF_DW: ++ /* Need to make sure off+4 does not overflow. */ ++ off_max = 0xfff - 4; ++ break; ++ } ++ return -off_max <= off && off <= off_max; ++} ++ ++/* *(size *)(dst + off) = src */ ++static inline void emit_str_r(const s8 dst, const s8 src[], ++ s16 off, struct jit_ctx *ctx, const u8 sz){ ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ s8 rd; ++ ++ rd = arm_bpf_get_reg32(dst, tmp[1], ctx); ++ ++ if (!is_ldst_imm(off, sz)) { ++ emit_a32_mov_i(tmp[0], off, ctx); ++ emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx); ++ rd = tmp[0]; ++ off = 0; ++ } ++ switch (sz) { ++ case BPF_B: ++ /* Store a Byte */ ++ emit(ARM_STRB_I(src_lo, rd, off), ctx); ++ break; ++ case BPF_H: ++ /* Store a HalfWord */ ++ emit(ARM_STRH_I(src_lo, rd, off), ctx); ++ break; ++ case BPF_W: ++ /* Store a Word */ ++ emit(ARM_STR_I(src_lo, rd, off), ctx); ++ break; ++ case BPF_DW: ++ /* Store a Double Word */ ++ emit(ARM_STR_I(src_lo, rd, off), ctx); ++ emit(ARM_STR_I(src_hi, rd, off + 4), ctx); ++ break; ++ } ++} ++ ++/* dst = *(size*)(src + off) */ ++static inline void emit_ldx_r(const s8 dst[], const s8 src, ++ s16 off, struct jit_ctx *ctx, const u8 sz){ ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *rd = is_stacked(dst_lo) ? tmp : dst; ++ s8 rm = src; ++ ++ if (!is_ldst_imm(off, sz)) { ++ emit_a32_mov_i(tmp[0], off, ctx); ++ emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); ++ rm = tmp[0]; ++ off = 0; ++ } else if (rd[1] == rm) { ++ emit(ARM_MOV_R(tmp[0], rm), ctx); ++ rm = tmp[0]; ++ } ++ switch (sz) { ++ case BPF_B: ++ /* Load a Byte */ ++ emit(ARM_LDRB_I(rd[1], rm, off), ctx); ++ if (!ctx->prog->aux->verifier_zext) ++ emit_a32_mov_i(rd[0], 0, ctx); ++ break; ++ case BPF_H: ++ /* Load a HalfWord */ ++ emit(ARM_LDRH_I(rd[1], rm, off), ctx); ++ if (!ctx->prog->aux->verifier_zext) ++ emit_a32_mov_i(rd[0], 0, ctx); ++ break; ++ case BPF_W: ++ /* Load a Word */ ++ emit(ARM_LDR_I(rd[1], rm, off), ctx); ++ if (!ctx->prog->aux->verifier_zext) ++ emit_a32_mov_i(rd[0], 0, ctx); ++ break; ++ case BPF_DW: ++ /* Load a Double Word */ ++ emit(ARM_LDR_I(rd[1], rm, off), ctx); ++ emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); ++ break; ++ } ++ arm_bpf_put_reg64(dst, rd, ctx); ++} ++ ++/* Arithmatic Operation */ ++static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, ++ const u8 rn, struct jit_ctx *ctx, u8 op, ++ bool is_jmp64) { ++ switch (op) { ++ case BPF_JSET: ++ if (is_jmp64) { ++ emit(ARM_AND_R(ARM_IP, rt, rn), ctx); ++ emit(ARM_AND_R(ARM_LR, rd, rm), ctx); ++ emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); ++ } else { ++ emit(ARM_ANDS_R(ARM_IP, rt, rn), ctx); + } +- return; ++ break; ++ case BPF_JEQ: ++ case BPF_JNE: ++ case BPF_JGT: ++ case BPF_JGE: ++ case BPF_JLE: ++ case BPF_JLT: ++ if (is_jmp64) { ++ emit(ARM_CMP_R(rd, rm), ctx); ++ /* Only compare low halve if high halve are equal. */ ++ _emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx); ++ } else { ++ emit(ARM_CMP_R(rt, rn), ctx); ++ } ++ break; ++ case BPF_JSLE: ++ case BPF_JSGT: ++ emit(ARM_CMP_R(rn, rt), ctx); ++ if (is_jmp64) ++ emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx); ++ break; ++ case BPF_JSLT: ++ case BPF_JSGE: ++ emit(ARM_CMP_R(rt, rn), ctx); ++ if (is_jmp64) ++ emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx); ++ break; + } +-#endif ++} + +- /* +- * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4 +- * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into +- * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm +- * before using it as a source for ARM_R1. +- * +- * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is +- * ARM_R5 (r_X) so there is no particular register overlap +- * issues. ++static int out_offset = -1; /* initialized on the first pass of build_body() */ ++static int emit_bpf_tail_call(struct jit_ctx *ctx) ++{ ++ ++ /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ ++ const s8 *r2 = bpf2a32[BPF_REG_2]; ++ const s8 *r3 = bpf2a32[BPF_REG_3]; ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ const s8 *tcc = bpf2a32[TCALL_CNT]; ++ const s8 *tc; ++ const int idx0 = ctx->idx; ++#define cur_offset (ctx->idx - idx0) ++#define jmp_offset (out_offset - (cur_offset) - 2) ++ u32 lo, hi; ++ s8 r_array, r_index; ++ int off; ++ ++ /* if (index >= array->map.max_entries) ++ * goto out; + */ +- if (rn != ARM_R1) +- emit(ARM_MOV_R(ARM_R1, rn), ctx); +- if (rm != ARM_R0) +- emit(ARM_MOV_R(ARM_R0, rm), ctx); ++ BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) > ++ ARM_INST_LDST__IMM12); ++ off = offsetof(struct bpf_array, map.max_entries); ++ r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx); ++ /* index is 32-bit for arrays */ ++ r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); ++ /* array->map.max_entries */ ++ emit(ARM_LDR_I(tmp[1], r_array, off), ctx); ++ /* index >= array->map.max_entries */ ++ emit(ARM_CMP_R(r_index, tmp[1]), ctx); ++ _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); ++ ++ /* tmp2[0] = array, tmp2[1] = index */ ++ ++ /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) ++ * goto out; ++ * tail_call_cnt++; ++ */ ++ lo = (u32)MAX_TAIL_CALL_CNT; ++ hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); ++ tc = arm_bpf_get_reg64(tcc, tmp, ctx); ++ emit(ARM_CMP_I(tc[0], hi), ctx); ++ _emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx); ++ _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); ++ emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx); ++ emit(ARM_ADC_I(tc[0], tc[0], 0), ctx); ++ arm_bpf_put_reg64(tcc, tmp, ctx); ++ ++ /* prog = array->ptrs[index] ++ * if (prog == NULL) ++ * goto out; ++ */ ++ BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0); ++ off = imm8m(offsetof(struct bpf_array, ptrs)); ++ emit(ARM_ADD_I(tmp[1], r_array, off), ctx); ++ emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx); ++ emit(ARM_CMP_I(tmp[1], 0), ctx); ++ _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); ++ ++ /* goto *(prog->bpf_func + prologue_size); */ ++ BUILD_BUG_ON(offsetof(struct bpf_prog, bpf_func) > ++ ARM_INST_LDST__IMM12); ++ off = offsetof(struct bpf_prog, bpf_func); ++ emit(ARM_LDR_I(tmp[1], tmp[1], off), ctx); ++ emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); ++ emit_bx_r(tmp[1], ctx); ++ ++ /* out: */ ++ if (out_offset == -1) ++ out_offset = cur_offset; ++ if (cur_offset != out_offset) { ++ pr_err_once("tail_call out_offset = %d, expected %d!\n", ++ cur_offset, out_offset); ++ return -1; ++ } ++ return 0; ++#undef cur_offset ++#undef jmp_offset ++} ++ ++/* 0xabcd => 0xcdab */ ++static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) ++{ ++#if __LINUX_ARM_ARCH__ < 6 ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; + +- ctx->seen |= SEEN_CALL; +- emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod, +- ctx); +- emit_blx_r(ARM_R3, ctx); ++ emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); ++ emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx); ++ emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); ++ emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx); ++#else /* ARMv6+ */ ++ emit(ARM_REV16(rd, rn), ctx); ++#endif ++} + +- if (rd != ARM_R0) +- emit(ARM_MOV_R(rd, ARM_R0), ctx); ++/* 0xabcdefgh => 0xghefcdab */ ++static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) ++{ ++#if __LINUX_ARM_ARCH__ < 6 ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ ++ emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); ++ emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx); ++ emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx); ++ ++ emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx); ++ emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx); ++ emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx); ++ emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); ++ emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx); ++ emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx); ++ emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx); ++ ++#else /* ARMv6+ */ ++ emit(ARM_REV(rd, rn), ctx); ++#endif + } + +-static inline void update_on_xread(struct jit_ctx *ctx) ++// push the scratch stack register on top of the stack ++static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx) + { +- if (!(ctx->seen & SEEN_X)) +- ctx->flags |= FLAG_NEED_X_RESET; ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ const s8 *rt; ++ u16 reg_set = 0; ++ ++ rt = arm_bpf_get_reg64(src, tmp2, ctx); + +- ctx->seen |= SEEN_X; ++ reg_set = (1 << rt[1]) | (1 << rt[0]); ++ emit(ARM_PUSH(reg_set), ctx); + } + +-static int build_body(struct jit_ctx *ctx) ++static void build_prologue(struct jit_ctx *ctx) + { +- void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; +- const struct bpf_prog *prog = ctx->skf; +- const struct sock_filter *inst; +- unsigned i, load_order, off, condt; +- int imm12; +- u32 k; ++ const s8 r0 = bpf2a32[BPF_REG_0][1]; ++ const s8 r2 = bpf2a32[BPF_REG_1][1]; ++ const s8 r3 = bpf2a32[BPF_REG_1][0]; ++ const s8 r4 = bpf2a32[BPF_REG_6][1]; ++ const s8 fplo = bpf2a32[BPF_REG_FP][1]; ++ const s8 fphi = bpf2a32[BPF_REG_FP][0]; ++ const s8 *tcc = bpf2a32[TCALL_CNT]; + +- for (i = 0; i < prog->len; i++) { +- u16 code; ++ /* Save callee saved registers. */ ++#ifdef CONFIG_FRAME_POINTER ++ u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC; ++ emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); ++ emit(ARM_PUSH(reg_set), ctx); ++ emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); ++#else ++ emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx); ++ emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx); ++#endif ++ /* Save frame pointer for later */ ++ emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx); + +- inst = &(prog->insns[i]); +- /* K as an immediate value operand */ +- k = inst->k; +- code = bpf_anc_helper(inst); ++ ctx->stack_size = imm8m(STACK_SIZE); + +- /* compute offsets only in the fake pass */ +- if (ctx->target == NULL) +- ctx->offsets[i] = ctx->idx * 4; ++ /* Set up function call stack */ ++ emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); + +- switch (code) { +- case BPF_LD | BPF_IMM: +- emit_mov_i(r_A, k, ctx); +- break; +- case BPF_LD | BPF_W | BPF_LEN: +- ctx->seen |= SEEN_SKB; +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); +- emit(ARM_LDR_I(r_A, r_skb, +- offsetof(struct sk_buff, len)), ctx); +- break; +- case BPF_LD | BPF_MEM: +- /* A = scratch[k] */ +- ctx->seen |= SEEN_MEM_WORD(k); +- emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); +- break; +- case BPF_LD | BPF_W | BPF_ABS: +- load_order = 2; +- goto load; +- case BPF_LD | BPF_H | BPF_ABS: +- load_order = 1; +- goto load; +- case BPF_LD | BPF_B | BPF_ABS: +- load_order = 0; +-load: +- emit_mov_i(r_off, k, ctx); +-load_common: +- ctx->seen |= SEEN_DATA | SEEN_CALL; +- +- if (load_order > 0) { +- emit(ARM_SUB_I(r_scratch, r_skb_hl, +- 1 << load_order), ctx); +- emit(ARM_CMP_R(r_scratch, r_off), ctx); +- condt = ARM_COND_GE; +- } else { +- emit(ARM_CMP_R(r_skb_hl, r_off), ctx); +- condt = ARM_COND_HI; +- } ++ /* Set up BPF prog stack base register */ ++ emit_a32_mov_r(fplo, ARM_IP, ctx); ++ emit_a32_mov_i(fphi, 0, ctx); + +- /* +- * test for negative offset, only if we are +- * currently scheduled to take the fast +- * path. this will update the flags so that +- * the slowpath instruction are ignored if the +- * offset is negative. +- * +- * for loard_order == 0 the HI condition will +- * make loads at offset 0 take the slow path too. +- */ +- _emit(condt, ARM_CMP_I(r_off, 0), ctx); ++ /* mov r4, 0 */ ++ emit(ARM_MOV_I(r4, 0), ctx); + +- _emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data), +- ctx); ++ /* Move BPF_CTX to BPF_R1 */ ++ emit(ARM_MOV_R(r3, r4), ctx); ++ emit(ARM_MOV_R(r2, r0), ctx); ++ /* Initialize Tail Count */ ++ emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx); ++ emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx); ++ /* end of prologue */ ++} + +- if (load_order == 0) +- _emit(condt, ARM_LDRB_I(r_A, r_scratch, 0), +- ctx); +- else if (load_order == 1) +- emit_load_be16(condt, r_A, r_scratch, ctx); +- else if (load_order == 2) +- emit_load_be32(condt, r_A, r_scratch, ctx); +- +- _emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx); +- +- /* the slowpath */ +- emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx); +- emit(ARM_MOV_R(ARM_R0, r_skb), ctx); +- /* the offset is already in R1 */ +- emit_blx_r(ARM_R3, ctx); +- /* check the result of skb_copy_bits */ +- emit(ARM_CMP_I(ARM_R1, 0), ctx); +- emit_err_ret(ARM_COND_NE, ctx); +- emit(ARM_MOV_R(r_A, ARM_R0), ctx); +- break; +- case BPF_LD | BPF_W | BPF_IND: +- load_order = 2; +- goto load_ind; +- case BPF_LD | BPF_H | BPF_IND: +- load_order = 1; +- goto load_ind; +- case BPF_LD | BPF_B | BPF_IND: +- load_order = 0; +-load_ind: +- update_on_xread(ctx); +- OP_IMM3(ARM_ADD, r_off, r_X, k, ctx); +- goto load_common; +- case BPF_LDX | BPF_IMM: +- ctx->seen |= SEEN_X; +- emit_mov_i(r_X, k, ctx); +- break; +- case BPF_LDX | BPF_W | BPF_LEN: +- ctx->seen |= SEEN_X | SEEN_SKB; +- emit(ARM_LDR_I(r_X, r_skb, +- offsetof(struct sk_buff, len)), ctx); +- break; +- case BPF_LDX | BPF_MEM: +- ctx->seen |= SEEN_X | SEEN_MEM_WORD(k); +- emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); +- break; +- case BPF_LDX | BPF_B | BPF_MSH: +- /* x = ((*(frame + k)) & 0xf) << 2; */ +- ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL; +- /* the interpreter should deal with the negative K */ +- if ((int)k < 0) +- return -1; +- /* offset in r1: we might have to take the slow path */ +- emit_mov_i(r_off, k, ctx); +- emit(ARM_CMP_R(r_skb_hl, r_off), ctx); +- +- /* load in r0: common with the slowpath */ +- _emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data, +- ARM_R1), ctx); +- /* +- * emit_mov_i() might generate one or two instructions, +- * the same holds for emit_blx_r() +- */ +- _emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx); ++/* restore callee saved registers. */ ++static void build_epilogue(struct jit_ctx *ctx) ++{ ++#ifdef CONFIG_FRAME_POINTER ++ /* When using frame pointers, some additional registers need to ++ * be loaded. */ ++ u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP; ++ emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx); ++ emit(ARM_LDM(ARM_SP, reg_set), ctx); ++#else ++ /* Restore callee saved registers. */ ++ emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx); ++ emit(ARM_POP(CALLEE_POP_MASK), ctx); ++#endif ++} + +- emit(ARM_MOV_R(ARM_R0, r_skb), ctx); +- /* r_off is r1 */ +- emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx); +- emit_blx_r(ARM_R3, ctx); +- /* check the return value of skb_copy_bits */ +- emit(ARM_CMP_I(ARM_R1, 0), ctx); +- emit_err_ret(ARM_COND_NE, ctx); +- +- emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx); +- emit(ARM_LSL_I(r_X, r_X, 2), ctx); +- break; +- case BPF_ST: +- ctx->seen |= SEEN_MEM_WORD(k); +- emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); +- break; +- case BPF_STX: +- update_on_xread(ctx); +- ctx->seen |= SEEN_MEM_WORD(k); +- emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); +- break; +- case BPF_ALU | BPF_ADD | BPF_K: +- /* A += K */ +- OP_IMM3(ARM_ADD, r_A, r_A, k, ctx); +- break; +- case BPF_ALU | BPF_ADD | BPF_X: +- update_on_xread(ctx); +- emit(ARM_ADD_R(r_A, r_A, r_X), ctx); +- break; +- case BPF_ALU | BPF_SUB | BPF_K: +- /* A -= K */ +- OP_IMM3(ARM_SUB, r_A, r_A, k, ctx); +- break; +- case BPF_ALU | BPF_SUB | BPF_X: +- update_on_xread(ctx); +- emit(ARM_SUB_R(r_A, r_A, r_X), ctx); +- break; +- case BPF_ALU | BPF_MUL | BPF_K: +- /* A *= K */ +- emit_mov_i(r_scratch, k, ctx); +- emit(ARM_MUL(r_A, r_A, r_scratch), ctx); +- break; +- case BPF_ALU | BPF_MUL | BPF_X: +- update_on_xread(ctx); +- emit(ARM_MUL(r_A, r_A, r_X), ctx); +- break; +- case BPF_ALU | BPF_DIV | BPF_K: +- if (k == 1) +- break; +- emit_mov_i(r_scratch, k, ctx); +- emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_DIV); +- break; +- case BPF_ALU | BPF_DIV | BPF_X: +- update_on_xread(ctx); +- emit(ARM_CMP_I(r_X, 0), ctx); +- emit_err_ret(ARM_COND_EQ, ctx); +- emit_udivmod(r_A, r_A, r_X, ctx, BPF_DIV); +- break; +- case BPF_ALU | BPF_MOD | BPF_K: +- if (k == 1) { +- emit_mov_i(r_A, 0, ctx); ++/* ++ * Convert an eBPF instruction to native instruction, i.e ++ * JITs an eBPF instruction. ++ * Returns : ++ * 0 - Successfully JITed an 8-byte eBPF instruction ++ * >0 - Successfully JITed a 16-byte eBPF instruction ++ * <0 - Failed to JIT. ++ */ ++static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) ++{ ++ const u8 code = insn->code; ++ const s8 *dst = bpf2a32[insn->dst_reg]; ++ const s8 *src = bpf2a32[insn->src_reg]; ++ const s8 *tmp = bpf2a32[TMP_REG_1]; ++ const s8 *tmp2 = bpf2a32[TMP_REG_2]; ++ const s16 off = insn->off; ++ const s32 imm = insn->imm; ++ const int i = insn - ctx->prog->insnsi; ++ const bool is64 = BPF_CLASS(code) == BPF_ALU64; ++ const s8 *rd, *rs; ++ s8 rd_lo, rt, rm, rn; ++ s32 jmp_offset; ++ ++#define check_imm(bits, imm) do { \ ++ if ((imm) >= (1 << ((bits) - 1)) || \ ++ (imm) < -(1 << ((bits) - 1))) { \ ++ pr_info("[%2d] imm=%d(0x%x) out of range\n", \ ++ i, imm, imm); \ ++ return -EINVAL; \ ++ } \ ++} while (0) ++#define check_imm24(imm) check_imm(24, imm) ++ ++ switch (code) { ++ /* ALU operations */ ++ ++ /* dst = src */ ++ case BPF_ALU | BPF_MOV | BPF_K: ++ case BPF_ALU | BPF_MOV | BPF_X: ++ case BPF_ALU64 | BPF_MOV | BPF_K: ++ case BPF_ALU64 | BPF_MOV | BPF_X: ++ switch (BPF_SRC(code)) { ++ case BPF_X: ++ if (imm == 1) { ++ /* Special mov32 for zext */ ++ emit_a32_mov_i(dst_hi, 0, ctx); + break; + } +- emit_mov_i(r_scratch, k, ctx); +- emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_MOD); ++ emit_a32_mov_r64(is64, dst, src, ctx); + break; +- case BPF_ALU | BPF_MOD | BPF_X: +- update_on_xread(ctx); +- emit(ARM_CMP_I(r_X, 0), ctx); +- emit_err_ret(ARM_COND_EQ, ctx); +- emit_udivmod(r_A, r_A, r_X, ctx, BPF_MOD); +- break; +- case BPF_ALU | BPF_OR | BPF_K: +- /* A |= K */ +- OP_IMM3(ARM_ORR, r_A, r_A, k, ctx); +- break; +- case BPF_ALU | BPF_OR | BPF_X: +- update_on_xread(ctx); +- emit(ARM_ORR_R(r_A, r_A, r_X), ctx); +- break; +- case BPF_ALU | BPF_XOR | BPF_K: +- /* A ^= K; */ +- OP_IMM3(ARM_EOR, r_A, r_A, k, ctx); +- break; +- case BPF_ANC | SKF_AD_ALU_XOR_X: +- case BPF_ALU | BPF_XOR | BPF_X: +- /* A ^= X */ +- update_on_xread(ctx); +- emit(ARM_EOR_R(r_A, r_A, r_X), ctx); +- break; +- case BPF_ALU | BPF_AND | BPF_K: +- /* A &= K */ +- OP_IMM3(ARM_AND, r_A, r_A, k, ctx); +- break; +- case BPF_ALU | BPF_AND | BPF_X: +- update_on_xread(ctx); +- emit(ARM_AND_R(r_A, r_A, r_X), ctx); +- break; +- case BPF_ALU | BPF_LSH | BPF_K: +- if (unlikely(k > 31)) +- return -1; +- emit(ARM_LSL_I(r_A, r_A, k), ctx); +- break; +- case BPF_ALU | BPF_LSH | BPF_X: +- update_on_xread(ctx); +- emit(ARM_LSL_R(r_A, r_A, r_X), ctx); +- break; +- case BPF_ALU | BPF_RSH | BPF_K: +- if (unlikely(k > 31)) +- return -1; +- if (k) +- emit(ARM_LSR_I(r_A, r_A, k), ctx); +- break; +- case BPF_ALU | BPF_RSH | BPF_X: +- update_on_xread(ctx); +- emit(ARM_LSR_R(r_A, r_A, r_X), ctx); +- break; +- case BPF_ALU | BPF_NEG: +- /* A = -A */ +- emit(ARM_RSB_I(r_A, r_A, 0), ctx); +- break; +- case BPF_JMP | BPF_JA: +- /* pc += K */ +- emit(ARM_B(b_imm(i + k + 1, ctx)), ctx); +- break; +- case BPF_JMP | BPF_JEQ | BPF_K: +- /* pc += (A == K) ? pc->jt : pc->jf */ +- condt = ARM_COND_EQ; +- goto cmp_imm; +- case BPF_JMP | BPF_JGT | BPF_K: +- /* pc += (A > K) ? pc->jt : pc->jf */ +- condt = ARM_COND_HI; +- goto cmp_imm; +- case BPF_JMP | BPF_JGE | BPF_K: +- /* pc += (A >= K) ? pc->jt : pc->jf */ +- condt = ARM_COND_HS; +-cmp_imm: +- imm12 = imm8m(k); +- if (imm12 < 0) { +- emit_mov_i_no8m(r_scratch, k, ctx); +- emit(ARM_CMP_R(r_A, r_scratch), ctx); +- } else { +- emit(ARM_CMP_I(r_A, imm12), ctx); +- } +-cond_jump: +- if (inst->jt) +- _emit(condt, ARM_B(b_imm(i + inst->jt + 1, +- ctx)), ctx); +- if (inst->jf) +- _emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1, +- ctx)), ctx); +- break; +- case BPF_JMP | BPF_JEQ | BPF_X: +- /* pc += (A == X) ? pc->jt : pc->jf */ +- condt = ARM_COND_EQ; +- goto cmp_x; +- case BPF_JMP | BPF_JGT | BPF_X: +- /* pc += (A > X) ? pc->jt : pc->jf */ +- condt = ARM_COND_HI; +- goto cmp_x; +- case BPF_JMP | BPF_JGE | BPF_X: +- /* pc += (A >= X) ? pc->jt : pc->jf */ +- condt = ARM_COND_CS; +-cmp_x: +- update_on_xread(ctx); +- emit(ARM_CMP_R(r_A, r_X), ctx); +- goto cond_jump; +- case BPF_JMP | BPF_JSET | BPF_K: +- /* pc += (A & K) ? pc->jt : pc->jf */ +- condt = ARM_COND_NE; +- /* not set iff all zeroes iff Z==1 iff EQ */ +- +- imm12 = imm8m(k); +- if (imm12 < 0) { +- emit_mov_i_no8m(r_scratch, k, ctx); +- emit(ARM_TST_R(r_A, r_scratch), ctx); +- } else { +- emit(ARM_TST_I(r_A, imm12), ctx); +- } +- goto cond_jump; +- case BPF_JMP | BPF_JSET | BPF_X: +- /* pc += (A & X) ? pc->jt : pc->jf */ +- update_on_xread(ctx); +- condt = ARM_COND_NE; +- emit(ARM_TST_R(r_A, r_X), ctx); +- goto cond_jump; +- case BPF_RET | BPF_A: +- emit(ARM_MOV_R(ARM_R0, r_A), ctx); +- goto b_epilogue; +- case BPF_RET | BPF_K: +- if ((k == 0) && (ctx->ret0_fp_idx < 0)) +- ctx->ret0_fp_idx = i; +- emit_mov_i(ARM_R0, k, ctx); +-b_epilogue: +- if (i != ctx->skf->len - 1) +- emit(ARM_B(b_imm(prog->len, ctx)), ctx); +- break; +- case BPF_MISC | BPF_TAX: +- /* X = A */ +- ctx->seen |= SEEN_X; +- emit(ARM_MOV_R(r_X, r_A), ctx); +- break; +- case BPF_MISC | BPF_TXA: +- /* A = X */ +- update_on_xread(ctx); +- emit(ARM_MOV_R(r_A, r_X), ctx); +- break; +- case BPF_ANC | SKF_AD_PROTOCOL: +- /* A = ntohs(skb->protocol) */ +- ctx->seen |= SEEN_SKB; +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, +- protocol) != 2); +- off = offsetof(struct sk_buff, protocol); +- emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx); +- emit_swap16(r_A, r_scratch, ctx); +- break; +- case BPF_ANC | SKF_AD_CPU: +- /* r_scratch = current_thread_info() */ +- OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx); +- /* A = current_thread_info()->cpu */ +- BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4); +- off = offsetof(struct thread_info, cpu); +- emit(ARM_LDR_I(r_A, r_scratch, off), ctx); +- break; +- case BPF_ANC | SKF_AD_IFINDEX: +- case BPF_ANC | SKF_AD_HATYPE: +- /* A = skb->dev->ifindex */ +- /* A = skb->dev->type */ +- ctx->seen |= SEEN_SKB; +- off = offsetof(struct sk_buff, dev); +- emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); +- +- emit(ARM_CMP_I(r_scratch, 0), ctx); +- emit_err_ret(ARM_COND_EQ, ctx); +- +- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, +- ifindex) != 4); +- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, +- type) != 2); +- +- if (code == (BPF_ANC | SKF_AD_IFINDEX)) { +- off = offsetof(struct net_device, ifindex); +- emit(ARM_LDR_I(r_A, r_scratch, off), ctx); +- } else { +- /* +- * offset of field "type" in "struct +- * net_device" is above what can be +- * used in the ldrh rd, [rn, #imm] +- * instruction, so load the offset in +- * a register and use ldrh rd, [rn, rm] +- */ +- off = offsetof(struct net_device, type); +- emit_mov_i(ARM_R3, off, ctx); +- emit(ARM_LDRH_R(r_A, r_scratch, ARM_R3), ctx); +- } ++ case BPF_K: ++ /* Sign-extend immediate value to destination reg */ ++ emit_a32_mov_se_i64(is64, dst, imm, ctx); + break; +- case BPF_ANC | SKF_AD_MARK: +- ctx->seen |= SEEN_SKB; +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); +- off = offsetof(struct sk_buff, mark); +- emit(ARM_LDR_I(r_A, r_skb, off), ctx); +- break; +- case BPF_ANC | SKF_AD_RXHASH: +- ctx->seen |= SEEN_SKB; +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); +- off = offsetof(struct sk_buff, hash); +- emit(ARM_LDR_I(r_A, r_skb, off), ctx); +- break; +- case BPF_ANC | SKF_AD_VLAN_TAG: +- case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: +- ctx->seen |= SEEN_SKB; +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); +- off = offsetof(struct sk_buff, vlan_tci); +- emit(ARM_LDRH_I(r_A, r_skb, off), ctx); +- if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) +- OP_IMM3(ARM_AND, r_A, r_A, ~VLAN_TAG_PRESENT, ctx); +- else { +- OP_IMM3(ARM_LSR, r_A, r_A, 12, ctx); +- OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx); +- } ++ } ++ break; ++ /* dst = dst + src/imm */ ++ /* dst = dst - src/imm */ ++ /* dst = dst | src/imm */ ++ /* dst = dst & src/imm */ ++ /* dst = dst ^ src/imm */ ++ /* dst = dst * src/imm */ ++ /* dst = dst << src */ ++ /* dst = dst >> src */ ++ case BPF_ALU | BPF_ADD | BPF_K: ++ case BPF_ALU | BPF_ADD | BPF_X: ++ case BPF_ALU | BPF_SUB | BPF_K: ++ case BPF_ALU | BPF_SUB | BPF_X: ++ case BPF_ALU | BPF_OR | BPF_K: ++ case BPF_ALU | BPF_OR | BPF_X: ++ case BPF_ALU | BPF_AND | BPF_K: ++ case BPF_ALU | BPF_AND | BPF_X: ++ case BPF_ALU | BPF_XOR | BPF_K: ++ case BPF_ALU | BPF_XOR | BPF_X: ++ case BPF_ALU | BPF_MUL | BPF_K: ++ case BPF_ALU | BPF_MUL | BPF_X: ++ case BPF_ALU | BPF_LSH | BPF_X: ++ case BPF_ALU | BPF_RSH | BPF_X: ++ case BPF_ALU | BPF_ARSH | BPF_K: ++ case BPF_ALU | BPF_ARSH | BPF_X: ++ case BPF_ALU64 | BPF_ADD | BPF_K: ++ case BPF_ALU64 | BPF_ADD | BPF_X: ++ case BPF_ALU64 | BPF_SUB | BPF_K: ++ case BPF_ALU64 | BPF_SUB | BPF_X: ++ case BPF_ALU64 | BPF_OR | BPF_K: ++ case BPF_ALU64 | BPF_OR | BPF_X: ++ case BPF_ALU64 | BPF_AND | BPF_K: ++ case BPF_ALU64 | BPF_AND | BPF_X: ++ case BPF_ALU64 | BPF_XOR | BPF_K: ++ case BPF_ALU64 | BPF_XOR | BPF_X: ++ switch (BPF_SRC(code)) { ++ case BPF_X: ++ emit_a32_alu_r64(is64, dst, src, ctx, BPF_OP(code)); ++ break; ++ case BPF_K: ++ /* Move immediate value to the temporary register ++ * and then do the ALU operation on the temporary ++ * register as this will sign-extend the immediate ++ * value into temporary reg and then it would be ++ * safe to do the operation on it. ++ */ ++ emit_a32_mov_se_i64(is64, tmp2, imm, ctx); ++ emit_a32_alu_r64(is64, dst, tmp2, ctx, BPF_OP(code)); + break; +- case BPF_ANC | SKF_AD_PKTTYPE: +- ctx->seen |= SEEN_SKB; +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, +- __pkt_type_offset[0]) != 1); +- off = PKT_TYPE_OFFSET(); +- emit(ARM_LDRB_I(r_A, r_skb, off), ctx); +- emit(ARM_AND_I(r_A, r_A, PKT_TYPE_MAX), ctx); +-#ifdef __BIG_ENDIAN_BITFIELD +- emit(ARM_LSR_I(r_A, r_A, 5), ctx); +-#endif ++ } ++ break; ++ /* dst = dst / src(imm) */ ++ /* dst = dst % src(imm) */ ++ case BPF_ALU | BPF_DIV | BPF_K: ++ case BPF_ALU | BPF_DIV | BPF_X: ++ case BPF_ALU | BPF_MOD | BPF_K: ++ case BPF_ALU | BPF_MOD | BPF_X: ++ rd_lo = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx); ++ switch (BPF_SRC(code)) { ++ case BPF_X: ++ rt = arm_bpf_get_reg32(src_lo, tmp2[0], ctx); ++ break; ++ case BPF_K: ++ rt = tmp2[0]; ++ emit_a32_mov_i(rt, imm, ctx); + break; +- case BPF_ANC | SKF_AD_QUEUE: +- ctx->seen |= SEEN_SKB; +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, +- queue_mapping) != 2); +- BUILD_BUG_ON(offsetof(struct sk_buff, +- queue_mapping) > 0xff); +- off = offsetof(struct sk_buff, queue_mapping); +- emit(ARM_LDRH_I(r_A, r_skb, off), ctx); +- break; +- case BPF_ANC | SKF_AD_PAY_OFFSET: +- ctx->seen |= SEEN_SKB | SEEN_CALL; +- +- emit(ARM_MOV_R(ARM_R0, r_skb), ctx); +- emit_mov_i(ARM_R3, (unsigned int)skb_get_poff, ctx); +- emit_blx_r(ARM_R3, ctx); +- emit(ARM_MOV_R(r_A, ARM_R0), ctx); +- break; +- case BPF_LDX | BPF_W | BPF_ABS: +- /* +- * load a 32bit word from struct seccomp_data. +- * seccomp_check_filter() will already have checked +- * that k is 32bit aligned and lies within the +- * struct seccomp_data. ++ default: ++ rt = src_lo; ++ break; ++ } ++ emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); ++ arm_bpf_put_reg32(dst_lo, rd_lo, ctx); ++ if (!ctx->prog->aux->verifier_zext) ++ emit_a32_mov_i(dst_hi, 0, ctx); ++ break; ++ case BPF_ALU64 | BPF_DIV | BPF_K: ++ case BPF_ALU64 | BPF_DIV | BPF_X: ++ case BPF_ALU64 | BPF_MOD | BPF_K: ++ case BPF_ALU64 | BPF_MOD | BPF_X: ++ goto notyet; ++ /* dst = dst >> imm */ ++ /* dst = dst << imm */ ++ case BPF_ALU | BPF_RSH | BPF_K: ++ case BPF_ALU | BPF_LSH | BPF_K: ++ if (unlikely(imm > 31)) ++ return -EINVAL; ++ if (imm) ++ emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code)); ++ if (!ctx->prog->aux->verifier_zext) ++ emit_a32_mov_i(dst_hi, 0, ctx); ++ break; ++ /* dst = dst << imm */ ++ case BPF_ALU64 | BPF_LSH | BPF_K: ++ if (unlikely(imm > 63)) ++ return -EINVAL; ++ emit_a32_lsh_i64(dst, imm, ctx); ++ break; ++ /* dst = dst >> imm */ ++ case BPF_ALU64 | BPF_RSH | BPF_K: ++ if (unlikely(imm > 63)) ++ return -EINVAL; ++ emit_a32_rsh_i64(dst, imm, ctx); ++ break; ++ /* dst = dst << src */ ++ case BPF_ALU64 | BPF_LSH | BPF_X: ++ emit_a32_lsh_r64(dst, src, ctx); ++ break; ++ /* dst = dst >> src */ ++ case BPF_ALU64 | BPF_RSH | BPF_X: ++ emit_a32_rsh_r64(dst, src, ctx); ++ break; ++ /* dst = dst >> src (signed) */ ++ case BPF_ALU64 | BPF_ARSH | BPF_X: ++ emit_a32_arsh_r64(dst, src, ctx); ++ break; ++ /* dst = dst >> imm (signed) */ ++ case BPF_ALU64 | BPF_ARSH | BPF_K: ++ if (unlikely(imm > 63)) ++ return -EINVAL; ++ emit_a32_arsh_i64(dst, imm, ctx); ++ break; ++ /* dst = ~dst */ ++ case BPF_ALU | BPF_NEG: ++ emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code)); ++ if (!ctx->prog->aux->verifier_zext) ++ emit_a32_mov_i(dst_hi, 0, ctx); ++ break; ++ /* dst = ~dst (64 bit) */ ++ case BPF_ALU64 | BPF_NEG: ++ emit_a32_neg64(dst, ctx); ++ break; ++ /* dst = dst * src/imm */ ++ case BPF_ALU64 | BPF_MUL | BPF_X: ++ case BPF_ALU64 | BPF_MUL | BPF_K: ++ switch (BPF_SRC(code)) { ++ case BPF_X: ++ emit_a32_mul_r64(dst, src, ctx); ++ break; ++ case BPF_K: ++ /* Move immediate value to the temporary register ++ * and then do the multiplication on it as this ++ * will sign-extend the immediate value into temp ++ * reg then it would be safe to do the operation ++ * on it. + */ +- ctx->seen |= SEEN_SKB; +- emit(ARM_LDR_I(r_A, r_skb, k), ctx); ++ emit_a32_mov_se_i64(is64, tmp2, imm, ctx); ++ emit_a32_mul_r64(dst, tmp2, ctx); ++ break; ++ } ++ break; ++ /* dst = htole(dst) */ ++ /* dst = htobe(dst) */ ++ case BPF_ALU | BPF_END | BPF_FROM_LE: ++ case BPF_ALU | BPF_END | BPF_FROM_BE: ++ rd = arm_bpf_get_reg64(dst, tmp, ctx); ++ if (BPF_SRC(code) == BPF_FROM_LE) ++ goto emit_bswap_uxt; ++ switch (imm) { ++ case 16: ++ emit_rev16(rd[1], rd[1], ctx); ++ goto emit_bswap_uxt; ++ case 32: ++ emit_rev32(rd[1], rd[1], ctx); ++ goto emit_bswap_uxt; ++ case 64: ++ emit_rev32(ARM_LR, rd[1], ctx); ++ emit_rev32(rd[1], rd[0], ctx); ++ emit(ARM_MOV_R(rd[0], ARM_LR), ctx); + break; +- default: +- return -1; + } ++ goto exit; ++emit_bswap_uxt: ++ switch (imm) { ++ case 16: ++ /* zero-extend 16 bits into 64 bits */ ++#if __LINUX_ARM_ARCH__ < 6 ++ emit_a32_mov_i(tmp2[1], 0xffff, ctx); ++ emit(ARM_AND_R(rd[1], rd[1], tmp2[1]), ctx); ++#else /* ARMv6+ */ ++ emit(ARM_UXTH(rd[1], rd[1]), ctx); ++#endif ++ if (!ctx->prog->aux->verifier_zext) ++ emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); ++ break; ++ case 32: ++ /* zero-extend 32 bits into 64 bits */ ++ if (!ctx->prog->aux->verifier_zext) ++ emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); ++ break; ++ case 64: ++ /* nop */ ++ break; ++ } ++exit: ++ arm_bpf_put_reg64(dst, rd, ctx); ++ break; ++ /* dst = imm64 */ ++ case BPF_LD | BPF_IMM | BPF_DW: ++ { ++ u64 val = (u32)imm | (u64)insn[1].imm << 32; + +- if (ctx->flags & FLAG_IMM_OVERFLOW) +- /* +- * this instruction generated an overflow when +- * trying to access the literal pool, so +- * delegate this filter to the kernel interpreter. +- */ +- return -1; ++ emit_a32_mov_i64(dst, val, ctx); ++ ++ return 1; + } ++ /* LDX: dst = *(size *)(src + off) */ ++ case BPF_LDX | BPF_MEM | BPF_W: ++ case BPF_LDX | BPF_MEM | BPF_H: ++ case BPF_LDX | BPF_MEM | BPF_B: ++ case BPF_LDX | BPF_MEM | BPF_DW: ++ rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); ++ emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); ++ break; ++ /* ST: *(size *)(dst + off) = imm */ ++ case BPF_ST | BPF_MEM | BPF_W: ++ case BPF_ST | BPF_MEM | BPF_H: ++ case BPF_ST | BPF_MEM | BPF_B: ++ case BPF_ST | BPF_MEM | BPF_DW: ++ switch (BPF_SIZE(code)) { ++ case BPF_DW: ++ /* Sign-extend immediate value into temp reg */ ++ emit_a32_mov_se_i64(true, tmp2, imm, ctx); ++ break; ++ case BPF_W: ++ case BPF_H: ++ case BPF_B: ++ emit_a32_mov_i(tmp2[1], imm, ctx); ++ break; ++ } ++ emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code)); ++ break; ++ /* STX XADD: lock *(u32 *)(dst + off) += src */ ++ case BPF_STX | BPF_XADD | BPF_W: ++ /* STX XADD: lock *(u64 *)(dst + off) += src */ ++ case BPF_STX | BPF_XADD | BPF_DW: ++ goto notyet; ++ /* STX: *(size *)(dst + off) = src */ ++ case BPF_STX | BPF_MEM | BPF_W: ++ case BPF_STX | BPF_MEM | BPF_H: ++ case BPF_STX | BPF_MEM | BPF_B: ++ case BPF_STX | BPF_MEM | BPF_DW: ++ rs = arm_bpf_get_reg64(src, tmp2, ctx); ++ emit_str_r(dst_lo, rs, off, ctx, BPF_SIZE(code)); ++ break; ++ /* PC += off if dst == src */ ++ /* PC += off if dst > src */ ++ /* PC += off if dst >= src */ ++ /* PC += off if dst < src */ ++ /* PC += off if dst <= src */ ++ /* PC += off if dst != src */ ++ /* PC += off if dst > src (signed) */ ++ /* PC += off if dst >= src (signed) */ ++ /* PC += off if dst < src (signed) */ ++ /* PC += off if dst <= src (signed) */ ++ /* PC += off if dst & src */ ++ case BPF_JMP | BPF_JEQ | BPF_X: ++ case BPF_JMP | BPF_JGT | BPF_X: ++ case BPF_JMP | BPF_JGE | BPF_X: ++ case BPF_JMP | BPF_JNE | BPF_X: ++ case BPF_JMP | BPF_JSGT | BPF_X: ++ case BPF_JMP | BPF_JSGE | BPF_X: ++ case BPF_JMP | BPF_JSET | BPF_X: ++ case BPF_JMP | BPF_JLE | BPF_X: ++ case BPF_JMP | BPF_JLT | BPF_X: ++ case BPF_JMP | BPF_JSLT | BPF_X: ++ case BPF_JMP | BPF_JSLE | BPF_X: ++ case BPF_JMP32 | BPF_JEQ | BPF_X: ++ case BPF_JMP32 | BPF_JGT | BPF_X: ++ case BPF_JMP32 | BPF_JGE | BPF_X: ++ case BPF_JMP32 | BPF_JNE | BPF_X: ++ case BPF_JMP32 | BPF_JSGT | BPF_X: ++ case BPF_JMP32 | BPF_JSGE | BPF_X: ++ case BPF_JMP32 | BPF_JSET | BPF_X: ++ case BPF_JMP32 | BPF_JLE | BPF_X: ++ case BPF_JMP32 | BPF_JLT | BPF_X: ++ case BPF_JMP32 | BPF_JSLT | BPF_X: ++ case BPF_JMP32 | BPF_JSLE | BPF_X: ++ /* Setup source registers */ ++ rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); ++ rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); ++ goto go_jmp; ++ /* PC += off if dst == imm */ ++ /* PC += off if dst > imm */ ++ /* PC += off if dst >= imm */ ++ /* PC += off if dst < imm */ ++ /* PC += off if dst <= imm */ ++ /* PC += off if dst != imm */ ++ /* PC += off if dst > imm (signed) */ ++ /* PC += off if dst >= imm (signed) */ ++ /* PC += off if dst < imm (signed) */ ++ /* PC += off if dst <= imm (signed) */ ++ /* PC += off if dst & imm */ ++ case BPF_JMP | BPF_JEQ | BPF_K: ++ case BPF_JMP | BPF_JGT | BPF_K: ++ case BPF_JMP | BPF_JGE | BPF_K: ++ case BPF_JMP | BPF_JNE | BPF_K: ++ case BPF_JMP | BPF_JSGT | BPF_K: ++ case BPF_JMP | BPF_JSGE | BPF_K: ++ case BPF_JMP | BPF_JSET | BPF_K: ++ case BPF_JMP | BPF_JLT | BPF_K: ++ case BPF_JMP | BPF_JLE | BPF_K: ++ case BPF_JMP | BPF_JSLT | BPF_K: ++ case BPF_JMP | BPF_JSLE | BPF_K: ++ case BPF_JMP32 | BPF_JEQ | BPF_K: ++ case BPF_JMP32 | BPF_JGT | BPF_K: ++ case BPF_JMP32 | BPF_JGE | BPF_K: ++ case BPF_JMP32 | BPF_JNE | BPF_K: ++ case BPF_JMP32 | BPF_JSGT | BPF_K: ++ case BPF_JMP32 | BPF_JSGE | BPF_K: ++ case BPF_JMP32 | BPF_JSET | BPF_K: ++ case BPF_JMP32 | BPF_JLT | BPF_K: ++ case BPF_JMP32 | BPF_JLE | BPF_K: ++ case BPF_JMP32 | BPF_JSLT | BPF_K: ++ case BPF_JMP32 | BPF_JSLE | BPF_K: ++ if (off == 0) ++ break; ++ rm = tmp2[0]; ++ rn = tmp2[1]; ++ /* Sign-extend immediate value */ ++ emit_a32_mov_se_i64(true, tmp2, imm, ctx); ++go_jmp: ++ /* Setup destination register */ ++ rd = arm_bpf_get_reg64(dst, tmp, ctx); ++ ++ /* Check for the condition */ ++ emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code), ++ BPF_CLASS(code) == BPF_JMP); ++ ++ /* Setup JUMP instruction */ ++ jmp_offset = bpf2a32_offset(i+off, i, ctx); ++ switch (BPF_OP(code)) { ++ case BPF_JNE: ++ case BPF_JSET: ++ _emit(ARM_COND_NE, ARM_B(jmp_offset), ctx); ++ break; ++ case BPF_JEQ: ++ _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); ++ break; ++ case BPF_JGT: ++ _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); ++ break; ++ case BPF_JGE: ++ _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); ++ break; ++ case BPF_JSGT: ++ _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); ++ break; ++ case BPF_JSGE: ++ _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); ++ break; ++ case BPF_JLE: ++ _emit(ARM_COND_LS, ARM_B(jmp_offset), ctx); ++ break; ++ case BPF_JLT: ++ _emit(ARM_COND_CC, ARM_B(jmp_offset), ctx); ++ break; ++ case BPF_JSLT: ++ _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); ++ break; ++ case BPF_JSLE: ++ _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); ++ break; ++ } ++ break; ++ /* JMP OFF */ ++ case BPF_JMP | BPF_JA: ++ { ++ if (off == 0) ++ break; ++ jmp_offset = bpf2a32_offset(i+off, i, ctx); ++ check_imm24(jmp_offset); ++ emit(ARM_B(jmp_offset), ctx); ++ break; ++ } ++ /* tail call */ ++ case BPF_JMP | BPF_TAIL_CALL: ++ if (emit_bpf_tail_call(ctx)) ++ return -EFAULT; ++ break; ++ /* function call */ ++ case BPF_JMP | BPF_CALL: ++ { ++ const s8 *r0 = bpf2a32[BPF_REG_0]; ++ const s8 *r1 = bpf2a32[BPF_REG_1]; ++ const s8 *r2 = bpf2a32[BPF_REG_2]; ++ const s8 *r3 = bpf2a32[BPF_REG_3]; ++ const s8 *r4 = bpf2a32[BPF_REG_4]; ++ const s8 *r5 = bpf2a32[BPF_REG_5]; ++ const u32 func = (u32)__bpf_call_base + (u32)imm; ++ ++ emit_a32_mov_r64(true, r0, r1, ctx); ++ emit_a32_mov_r64(true, r1, r2, ctx); ++ emit_push_r64(r5, ctx); ++ emit_push_r64(r4, ctx); ++ emit_push_r64(r3, ctx); + +- /* compute offsets only during the first pass */ +- if (ctx->target == NULL) +- ctx->offsets[i] = ctx->idx * 4; ++ emit_a32_mov_i(tmp[1], func, ctx); ++ emit_blx_r(tmp[1], ctx); + ++ emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean ++ break; ++ } ++ /* function return */ ++ case BPF_JMP | BPF_EXIT: ++ /* Optimization: when last instruction is EXIT ++ * simply fallthrough to epilogue. ++ */ ++ if (i == ctx->prog->len - 1) ++ break; ++ jmp_offset = epilogue_offset(ctx); ++ check_imm24(jmp_offset); ++ emit(ARM_B(jmp_offset), ctx); ++ break; ++notyet: ++ pr_info_once("*** NOT YET: opcode %02x ***\n", code); ++ return -EFAULT; ++ default: ++ pr_err_once("unknown opcode %02x\n", code); ++ return -EINVAL; ++ } ++ ++ if (ctx->flags & FLAG_IMM_OVERFLOW) ++ /* ++ * this instruction generated an overflow when ++ * trying to access the literal pool, so ++ * delegate this filter to the kernel interpreter. ++ */ ++ return -1; + return 0; + } + ++static int build_body(struct jit_ctx *ctx) ++{ ++ const struct bpf_prog *prog = ctx->prog; ++ unsigned int i; ++ ++ for (i = 0; i < prog->len; i++) { ++ const struct bpf_insn *insn = &(prog->insnsi[i]); ++ int ret; ++ ++ ret = build_insn(insn, ctx); ++ ++ /* It's used with loading the 64 bit immediate value. */ ++ if (ret > 0) { ++ i++; ++ if (ctx->target == NULL) ++ ctx->offsets[i] = ctx->idx; ++ continue; ++ } ++ ++ if (ctx->target == NULL) ++ ctx->offsets[i] = ctx->idx; ++ ++ /* If unsuccesfull, return with error code */ ++ if (ret) ++ return ret; ++ } ++ return 0; ++} ++ ++static int validate_code(struct jit_ctx *ctx) ++{ ++ int i; ++ ++ for (i = 0; i < ctx->idx; i++) { ++ if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF)) ++ return -1; ++ } ++ ++ return 0; ++} + +-void bpf_jit_compile(struct bpf_prog *fp) ++void bpf_jit_compile(struct bpf_prog *prog) + { ++ /* Nothing to do here. We support Internal BPF. */ ++} ++ ++bool bpf_jit_needs_zext(void) ++{ ++ return true; ++} ++ ++struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ++{ ++ struct bpf_prog *tmp, *orig_prog = prog; + struct bpf_binary_header *header; ++ bool tmp_blinded = false; + struct jit_ctx ctx; +- unsigned tmp_idx; +- unsigned alloc_size; +- u8 *target_ptr; ++ unsigned int tmp_idx; ++ unsigned int image_size; ++ u8 *image_ptr; + +- if (!bpf_jit_enable) +- return; ++ /* If BPF JIT was not enabled then we must fall back to ++ * the interpreter. ++ */ ++ if (!prog->jit_requested) ++ return orig_prog; + +- memset(&ctx, 0, sizeof(ctx)); +- ctx.skf = fp; +- ctx.ret0_fp_idx = -1; ++ /* If constant blinding was enabled and we failed during blinding ++ * then we must fall back to the interpreter. Otherwise, we save ++ * the new JITed code. ++ */ ++ tmp = bpf_jit_blind_constants(prog); + +- ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL); +- if (ctx.offsets == NULL) +- return; ++ if (IS_ERR(tmp)) ++ return orig_prog; ++ if (tmp != prog) { ++ tmp_blinded = true; ++ prog = tmp; ++ } + +- /* fake pass to fill in the ctx->seen */ +- if (unlikely(build_body(&ctx))) ++ memset(&ctx, 0, sizeof(ctx)); ++ ctx.prog = prog; ++ ctx.cpu_architecture = cpu_architecture(); ++ ++ /* Not able to allocate memory for offsets[] , then ++ * we must fall back to the interpreter ++ */ ++ ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL); ++ if (ctx.offsets == NULL) { ++ prog = orig_prog; + goto out; ++ } ++ ++ /* 1) fake pass to find in the length of the JITed code, ++ * to compute ctx->offsets and other context variables ++ * needed to compute final JITed code. ++ * Also, calculate random starting pointer/start of JITed code ++ * which is prefixed by random number of fault instructions. ++ * ++ * If the first pass fails then there is no chance of it ++ * being successful in the second pass, so just fall back ++ * to the interpreter. ++ */ ++ if (build_body(&ctx)) { ++ prog = orig_prog; ++ goto out_off; ++ } + + tmp_idx = ctx.idx; + build_prologue(&ctx); + ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; + ++ ctx.epilogue_offset = ctx.idx; ++ + #if __LINUX_ARM_ARCH__ < 7 + tmp_idx = ctx.idx; + build_epilogue(&ctx); +@@ -1020,64 +1941,83 @@ void bpf_jit_compile(struct bpf_prog *fp + + ctx.idx += ctx.imm_count; + if (ctx.imm_count) { +- ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL); +- if (ctx.imms == NULL) +- goto out; ++ ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL); ++ if (ctx.imms == NULL) { ++ prog = orig_prog; ++ goto out_off; ++ } + } + #else +- /* there's nothing after the epilogue on ARMv7 */ ++ /* there's nothing about the epilogue on ARMv7 */ + build_epilogue(&ctx); + #endif +- alloc_size = 4 * ctx.idx; +- header = bpf_jit_binary_alloc(alloc_size, &target_ptr, +- 4, jit_fill_hole); +- if (header == NULL) +- goto out; ++ /* Now we can get the actual image size of the JITed arm code. ++ * Currently, we are not considering the THUMB-2 instructions ++ * for jit, although it can decrease the size of the image. ++ * ++ * As each arm instruction is of length 32bit, we are translating ++ * number of JITed intructions into the size required to store these ++ * JITed code. ++ */ ++ image_size = sizeof(u32) * ctx.idx; + +- ctx.target = (u32 *) target_ptr; ++ /* Now we know the size of the structure to make */ ++ header = bpf_jit_binary_alloc(image_size, &image_ptr, ++ sizeof(u32), jit_fill_hole); ++ /* Not able to allocate memory for the structure then ++ * we must fall back to the interpretation ++ */ ++ if (header == NULL) { ++ prog = orig_prog; ++ goto out_imms; ++ } ++ ++ /* 2.) Actual pass to generate final JIT code */ ++ ctx.target = (u32 *) image_ptr; + ctx.idx = 0; + + build_prologue(&ctx); ++ ++ /* If building the body of the JITed code fails somehow, ++ * we fall back to the interpretation. ++ */ + if (build_body(&ctx) < 0) { +-#if __LINUX_ARM_ARCH__ < 7 +- if (ctx.imm_count) +- kfree(ctx.imms); +-#endif ++ image_ptr = NULL; + bpf_jit_binary_free(header); +- goto out; ++ prog = orig_prog; ++ goto out_imms; + } + build_epilogue(&ctx); + ++ /* 3.) Extra pass to validate JITed Code */ ++ if (validate_code(&ctx)) { ++ image_ptr = NULL; ++ bpf_jit_binary_free(header); ++ prog = orig_prog; ++ goto out_imms; ++ } + flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx)); + ++ if (bpf_jit_enable > 1) ++ /* there are 2 passes here */ ++ bpf_jit_dump(prog->len, image_size, 2, ctx.target); ++ ++ bpf_jit_binary_lock_ro(header); ++ prog->bpf_func = (void *)ctx.target; ++ prog->jited = 1; ++ prog->jited_len = image_size; ++ ++out_imms: + #if __LINUX_ARM_ARCH__ < 7 + if (ctx.imm_count) + kfree(ctx.imms); + #endif +- +- if (bpf_jit_enable > 1) +- /* there are 2 passes here */ +- bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); +- +- set_memory_ro((unsigned long)header, header->pages); +- fp->bpf_func = (void *)ctx.target; +- fp->jited = 1; +-out: ++out_off: + kfree(ctx.offsets); +- return; ++out: ++ if (tmp_blinded) ++ bpf_jit_prog_release_other(prog, prog == orig_prog ? ++ tmp : orig_prog); ++ return prog; + } + +-void bpf_jit_free(struct bpf_prog *fp) +-{ +- unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; +- struct bpf_binary_header *header = (void *)addr; +- +- if (!fp->jited) +- goto free_filter; +- +- set_memory_rw(addr, header->pages); +- bpf_jit_binary_free(header); +- +-free_filter: +- bpf_prog_unlock_free(fp); +-} +--- a/arch/arm/net/bpf_jit_32.h ++++ b/arch/arm/net/bpf_jit_32.h +@@ -1,16 +1,14 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ + /* + * Just-In-Time compiler for BPF filters on 32bit ARM + * + * Copyright (c) 2011 Mircea Gherzan +- * +- * This program is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License as published by the +- * Free Software Foundation; version 2 of the License. + */ + + #ifndef PFILTER_OPCODES_ARM_H + #define PFILTER_OPCODES_ARM_H + ++/* ARM 32bit Registers */ + #define ARM_R0 0 + #define ARM_R1 1 + #define ARM_R2 2 +@@ -22,40 +20,46 @@ + #define ARM_R8 8 + #define ARM_R9 9 + #define ARM_R10 10 +-#define ARM_FP 11 +-#define ARM_IP 12 +-#define ARM_SP 13 +-#define ARM_LR 14 +-#define ARM_PC 15 +- +-#define ARM_COND_EQ 0x0 +-#define ARM_COND_NE 0x1 +-#define ARM_COND_CS 0x2 ++#define ARM_FP 11 /* Frame Pointer */ ++#define ARM_IP 12 /* Intra-procedure scratch register */ ++#define ARM_SP 13 /* Stack pointer: as load/store base reg */ ++#define ARM_LR 14 /* Link Register */ ++#define ARM_PC 15 /* Program counter */ ++ ++#define ARM_COND_EQ 0x0 /* == */ ++#define ARM_COND_NE 0x1 /* != */ ++#define ARM_COND_CS 0x2 /* unsigned >= */ + #define ARM_COND_HS ARM_COND_CS +-#define ARM_COND_CC 0x3 ++#define ARM_COND_CC 0x3 /* unsigned < */ + #define ARM_COND_LO ARM_COND_CC +-#define ARM_COND_MI 0x4 +-#define ARM_COND_PL 0x5 +-#define ARM_COND_VS 0x6 +-#define ARM_COND_VC 0x7 +-#define ARM_COND_HI 0x8 +-#define ARM_COND_LS 0x9 +-#define ARM_COND_GE 0xa +-#define ARM_COND_LT 0xb +-#define ARM_COND_GT 0xc +-#define ARM_COND_LE 0xd +-#define ARM_COND_AL 0xe ++#define ARM_COND_MI 0x4 /* < 0 */ ++#define ARM_COND_PL 0x5 /* >= 0 */ ++#define ARM_COND_VS 0x6 /* Signed Overflow */ ++#define ARM_COND_VC 0x7 /* No Signed Overflow */ ++#define ARM_COND_HI 0x8 /* unsigned > */ ++#define ARM_COND_LS 0x9 /* unsigned <= */ ++#define ARM_COND_GE 0xa /* Signed >= */ ++#define ARM_COND_LT 0xb /* Signed < */ ++#define ARM_COND_GT 0xc /* Signed > */ ++#define ARM_COND_LE 0xd /* Signed <= */ ++#define ARM_COND_AL 0xe /* None */ + + /* register shift types */ + #define SRTYPE_LSL 0 + #define SRTYPE_LSR 1 + #define SRTYPE_ASR 2 + #define SRTYPE_ROR 3 ++#define SRTYPE_ASL (SRTYPE_LSL) + + #define ARM_INST_ADD_R 0x00800000 ++#define ARM_INST_ADDS_R 0x00900000 ++#define ARM_INST_ADC_R 0x00a00000 ++#define ARM_INST_ADC_I 0x02a00000 + #define ARM_INST_ADD_I 0x02800000 ++#define ARM_INST_ADDS_I 0x02900000 + + #define ARM_INST_AND_R 0x00000000 ++#define ARM_INST_ANDS_R 0x00100000 + #define ARM_INST_AND_I 0x02000000 + + #define ARM_INST_BIC_R 0x01c00000 +@@ -71,13 +75,18 @@ + #define ARM_INST_EOR_R 0x00200000 + #define ARM_INST_EOR_I 0x02200000 + +-#define ARM_INST_LDRB_I 0x05d00000 ++#define ARM_INST_LDST__U 0x00800000 ++#define ARM_INST_LDST__IMM12 0x00000fff ++#define ARM_INST_LDRB_I 0x05500000 + #define ARM_INST_LDRB_R 0x07d00000 +-#define ARM_INST_LDRH_I 0x01d000b0 ++#define ARM_INST_LDRD_I 0x014000d0 ++#define ARM_INST_LDRH_I 0x015000b0 + #define ARM_INST_LDRH_R 0x019000b0 +-#define ARM_INST_LDR_I 0x05900000 ++#define ARM_INST_LDR_I 0x05100000 ++#define ARM_INST_LDR_R 0x07900000 + + #define ARM_INST_LDM 0x08900000 ++#define ARM_INST_LDM_IA 0x08b00000 + + #define ARM_INST_LSL_I 0x01a00000 + #define ARM_INST_LSL_R 0x01a00010 +@@ -86,6 +95,7 @@ + #define ARM_INST_LSR_R 0x01a00030 + + #define ARM_INST_MOV_R 0x01a00000 ++#define ARM_INST_MOVS_R 0x01b00000 + #define ARM_INST_MOV_I 0x03a00000 + #define ARM_INST_MOVW 0x03000000 + #define ARM_INST_MOVT 0x03400000 +@@ -96,17 +106,29 @@ + #define ARM_INST_PUSH 0x092d0000 + + #define ARM_INST_ORR_R 0x01800000 ++#define ARM_INST_ORRS_R 0x01900000 + #define ARM_INST_ORR_I 0x03800000 + + #define ARM_INST_REV 0x06bf0f30 + #define ARM_INST_REV16 0x06bf0fb0 + + #define ARM_INST_RSB_I 0x02600000 ++#define ARM_INST_RSBS_I 0x02700000 ++#define ARM_INST_RSC_I 0x02e00000 + + #define ARM_INST_SUB_R 0x00400000 ++#define ARM_INST_SUBS_R 0x00500000 ++#define ARM_INST_RSB_R 0x00600000 + #define ARM_INST_SUB_I 0x02400000 +- +-#define ARM_INST_STR_I 0x05800000 ++#define ARM_INST_SUBS_I 0x02500000 ++#define ARM_INST_SBC_I 0x02c00000 ++#define ARM_INST_SBC_R 0x00c00000 ++#define ARM_INST_SBCS_R 0x00d00000 ++ ++#define ARM_INST_STR_I 0x05000000 ++#define ARM_INST_STRB_I 0x05400000 ++#define ARM_INST_STRD_I 0x014000f0 ++#define ARM_INST_STRH_I 0x014000b0 + + #define ARM_INST_TST_R 0x01100000 + #define ARM_INST_TST_I 0x03100000 +@@ -117,6 +139,8 @@ + + #define ARM_INST_MLS 0x00600090 + ++#define ARM_INST_UXTH 0x06ff0070 ++ + /* + * Use a suitable undefined instruction to use for ARM/Thumb2 faulting. + * We need to be careful not to conflict with those used by other modules +@@ -135,11 +159,18 @@ + #define _AL3_R(op, rd, rn, rm) ((op ## _R) | (rd) << 12 | (rn) << 16 | (rm)) + /* immediate */ + #define _AL3_I(op, rd, rn, imm) ((op ## _I) | (rd) << 12 | (rn) << 16 | (imm)) ++/* register with register-shift */ ++#define _AL3_SR(inst) (inst | (1 << 4)) + + #define ARM_ADD_R(rd, rn, rm) _AL3_R(ARM_INST_ADD, rd, rn, rm) ++#define ARM_ADDS_R(rd, rn, rm) _AL3_R(ARM_INST_ADDS, rd, rn, rm) + #define ARM_ADD_I(rd, rn, imm) _AL3_I(ARM_INST_ADD, rd, rn, imm) ++#define ARM_ADDS_I(rd, rn, imm) _AL3_I(ARM_INST_ADDS, rd, rn, imm) ++#define ARM_ADC_R(rd, rn, rm) _AL3_R(ARM_INST_ADC, rd, rn, rm) ++#define ARM_ADC_I(rd, rn, imm) _AL3_I(ARM_INST_ADC, rd, rn, imm) + + #define ARM_AND_R(rd, rn, rm) _AL3_R(ARM_INST_AND, rd, rn, rm) ++#define ARM_ANDS_R(rd, rn, rm) _AL3_R(ARM_INST_ANDS, rd, rn, rm) + #define ARM_AND_I(rd, rn, imm) _AL3_I(ARM_INST_AND, rd, rn, imm) + + #define ARM_BIC_R(rd, rn, rm) _AL3_R(ARM_INST_BIC, rd, rn, rm) +@@ -155,27 +186,38 @@ + #define ARM_EOR_R(rd, rn, rm) _AL3_R(ARM_INST_EOR, rd, rn, rm) + #define ARM_EOR_I(rd, rn, imm) _AL3_I(ARM_INST_EOR, rd, rn, imm) + +-#define ARM_LDR_I(rt, rn, off) (ARM_INST_LDR_I | (rt) << 12 | (rn) << 16 \ +- | (off)) +-#define ARM_LDRB_I(rt, rn, off) (ARM_INST_LDRB_I | (rt) << 12 | (rn) << 16 \ +- | (off)) +-#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | (rt) << 12 | (rn) << 16 \ ++#define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | ARM_INST_LDST__U \ ++ | (rt) << 12 | (rn) << 16 \ + | (rm)) +-#define ARM_LDRH_I(rt, rn, off) (ARM_INST_LDRH_I | (rt) << 12 | (rn) << 16 \ +- | (((off) & 0xf0) << 4) | ((off) & 0xf)) +-#define ARM_LDRH_R(rt, rn, rm) (ARM_INST_LDRH_R | (rt) << 12 | (rn) << 16 \ ++#define ARM_LDR_R_SI(rt, rn, rm, type, imm) \ ++ (ARM_INST_LDR_R | ARM_INST_LDST__U \ ++ | (rt) << 12 | (rn) << 16 \ ++ | (imm) << 7 | (type) << 5 | (rm)) ++#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | ARM_INST_LDST__U \ ++ | (rt) << 12 | (rn) << 16 \ ++ | (rm)) ++#define ARM_LDRH_R(rt, rn, rm) (ARM_INST_LDRH_R | ARM_INST_LDST__U \ ++ | (rt) << 12 | (rn) << 16 \ + | (rm)) + + #define ARM_LDM(rn, regs) (ARM_INST_LDM | (rn) << 16 | (regs)) ++#define ARM_LDM_IA(rn, regs) (ARM_INST_LDM_IA | (rn) << 16 | (regs)) + + #define ARM_LSL_R(rd, rn, rm) (_AL3_R(ARM_INST_LSL, rd, 0, rn) | (rm) << 8) + #define ARM_LSL_I(rd, rn, imm) (_AL3_I(ARM_INST_LSL, rd, 0, rn) | (imm) << 7) + + #define ARM_LSR_R(rd, rn, rm) (_AL3_R(ARM_INST_LSR, rd, 0, rn) | (rm) << 8) + #define ARM_LSR_I(rd, rn, imm) (_AL3_I(ARM_INST_LSR, rd, 0, rn) | (imm) << 7) ++#define ARM_ASR_R(rd, rn, rm) (_AL3_R(ARM_INST_ASR, rd, 0, rn) | (rm) << 8) ++#define ARM_ASR_I(rd, rn, imm) (_AL3_I(ARM_INST_ASR, rd, 0, rn) | (imm) << 7) + + #define ARM_MOV_R(rd, rm) _AL3_R(ARM_INST_MOV, rd, 0, rm) ++#define ARM_MOVS_R(rd, rm) _AL3_R(ARM_INST_MOVS, rd, 0, rm) + #define ARM_MOV_I(rd, imm) _AL3_I(ARM_INST_MOV, rd, 0, imm) ++#define ARM_MOV_SR(rd, rm, type, rs) \ ++ (_AL3_SR(ARM_MOV_R(rd, rm)) | (type) << 5 | (rs) << 8) ++#define ARM_MOV_SI(rd, rm, type, imm6) \ ++ (ARM_MOV_R(rd, rm) | (type) << 5 | (imm6) << 7) + + #define ARM_MOVW(rd, imm) \ + (ARM_INST_MOVW | ((imm) >> 12) << 16 | (rd) << 12 | ((imm) & 0x0fff)) +@@ -190,19 +232,31 @@ + + #define ARM_ORR_R(rd, rn, rm) _AL3_R(ARM_INST_ORR, rd, rn, rm) + #define ARM_ORR_I(rd, rn, imm) _AL3_I(ARM_INST_ORR, rd, rn, imm) +-#define ARM_ORR_S(rd, rn, rm, type, rs) \ +- (ARM_ORR_R(rd, rn, rm) | (type) << 5 | (rs) << 7) ++#define ARM_ORR_SR(rd, rn, rm, type, rs) \ ++ (_AL3_SR(ARM_ORR_R(rd, rn, rm)) | (type) << 5 | (rs) << 8) ++#define ARM_ORRS_R(rd, rn, rm) _AL3_R(ARM_INST_ORRS, rd, rn, rm) ++#define ARM_ORRS_SR(rd, rn, rm, type, rs) \ ++ (_AL3_SR(ARM_ORRS_R(rd, rn, rm)) | (type) << 5 | (rs) << 8) ++#define ARM_ORR_SI(rd, rn, rm, type, imm6) \ ++ (ARM_ORR_R(rd, rn, rm) | (type) << 5 | (imm6) << 7) ++#define ARM_ORRS_SI(rd, rn, rm, type, imm6) \ ++ (ARM_ORRS_R(rd, rn, rm) | (type) << 5 | (imm6) << 7) + + #define ARM_REV(rd, rm) (ARM_INST_REV | (rd) << 12 | (rm)) + #define ARM_REV16(rd, rm) (ARM_INST_REV16 | (rd) << 12 | (rm)) + + #define ARM_RSB_I(rd, rn, imm) _AL3_I(ARM_INST_RSB, rd, rn, imm) ++#define ARM_RSBS_I(rd, rn, imm) _AL3_I(ARM_INST_RSBS, rd, rn, imm) ++#define ARM_RSC_I(rd, rn, imm) _AL3_I(ARM_INST_RSC, rd, rn, imm) + + #define ARM_SUB_R(rd, rn, rm) _AL3_R(ARM_INST_SUB, rd, rn, rm) ++#define ARM_SUBS_R(rd, rn, rm) _AL3_R(ARM_INST_SUBS, rd, rn, rm) ++#define ARM_RSB_R(rd, rn, rm) _AL3_R(ARM_INST_RSB, rd, rn, rm) ++#define ARM_SBC_R(rd, rn, rm) _AL3_R(ARM_INST_SBC, rd, rn, rm) ++#define ARM_SBCS_R(rd, rn, rm) _AL3_R(ARM_INST_SBCS, rd, rn, rm) + #define ARM_SUB_I(rd, rn, imm) _AL3_I(ARM_INST_SUB, rd, rn, imm) +- +-#define ARM_STR_I(rt, rn, off) (ARM_INST_STR_I | (rt) << 12 | (rn) << 16 \ +- | (off)) ++#define ARM_SUBS_I(rd, rn, imm) _AL3_I(ARM_INST_SUBS, rd, rn, imm) ++#define ARM_SBC_I(rd, rn, imm) _AL3_I(ARM_INST_SBC, rd, rn, imm) + + #define ARM_TST_R(rn, rm) _AL3_R(ARM_INST_TST, 0, rn, rm) + #define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm) +@@ -214,5 +268,6 @@ + + #define ARM_MLS(rd, rn, rm, ra) (ARM_INST_MLS | (rd) << 16 | (rn) | (rm) << 8 \ + | (ra) << 12) ++#define ARM_UXTH(rd, rm) (ARM_INST_UXTH | (rd) << 12 | (rm)) + + #endif /* PFILTER_OPCODES_ARM_H */ +--- a/arch/arm/net/Makefile ++++ b/arch/arm/net/Makefile +@@ -1,3 +1,4 @@ ++# SPDX-License-Identifier: GPL-2.0-only + # ARM-specific networking code + + obj-$(CONFIG_BPF_JIT) += bpf_jit_32.o +--- /dev/null ++++ b/include/linux/bpf-cgroup.h +@@ -0,0 +1,410 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _BPF_CGROUP_H ++#define _BPF_CGROUP_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct sock; ++struct sockaddr; ++struct cgroup; ++struct sk_buff; ++struct bpf_map; ++struct bpf_prog; ++struct bpf_sock_ops_kern; ++struct bpf_cgroup_storage; ++struct ctl_table; ++struct ctl_table_header; ++ ++#ifdef CONFIG_CGROUP_BPF ++ ++extern struct static_key_false cgroup_bpf_enabled_key; ++#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) ++ ++DECLARE_PER_CPU(struct bpf_cgroup_storage*, ++ bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); ++ ++#define for_each_cgroup_storage_type(stype) \ ++ for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) ++ ++struct bpf_cgroup_storage_map; ++ ++struct bpf_storage_buffer { ++ struct rcu_head rcu; ++ char data[0]; ++}; ++ ++struct bpf_cgroup_storage { ++ union { ++ struct bpf_storage_buffer *buf; ++ void __percpu *percpu_buf; ++ }; ++ struct bpf_cgroup_storage_map *map; ++ struct bpf_cgroup_storage_key key; ++ struct list_head list; ++ struct rb_node node; ++ struct rcu_head rcu; ++}; ++ ++struct bpf_prog_list { ++ struct list_head node; ++ struct bpf_prog *prog; ++ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; ++}; ++ ++struct bpf_prog_array; ++ ++struct cgroup_bpf { ++ /* array of effective progs in this cgroup */ ++ struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE]; ++ ++ /* attached progs to this cgroup and attach flags ++ * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will ++ * have either zero or one element ++ * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS ++ */ ++ struct list_head progs[MAX_BPF_ATTACH_TYPE]; ++ u32 flags[MAX_BPF_ATTACH_TYPE]; ++ ++ /* temp storage for effective prog array used by prog_attach/detach */ ++ struct bpf_prog_array *inactive; ++ ++ /* reference counter used to detach bpf programs after cgroup removal */ ++ struct percpu_ref refcnt; ++ ++ /* cgroup_bpf is released using a work queue */ ++ struct work_struct release_work; ++}; ++ ++int cgroup_bpf_inherit(struct cgroup *cgrp); ++void cgroup_bpf_offline(struct cgroup *cgrp); ++ ++int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, ++ enum bpf_attach_type type, u32 flags); ++int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, ++ enum bpf_attach_type type); ++int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, ++ union bpf_attr __user *uattr); ++ ++/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ ++int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, ++ enum bpf_attach_type type, u32 flags); ++int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, ++ enum bpf_attach_type type, u32 flags); ++int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, ++ union bpf_attr __user *uattr); ++ ++int __cgroup_bpf_run_filter_skb(struct sock *sk, ++ struct sk_buff *skb, ++ enum bpf_attach_type type); ++ ++int __cgroup_bpf_run_filter_sk(struct sock *sk, ++ enum bpf_attach_type type); ++ ++int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, ++ struct sockaddr *uaddr, ++ enum bpf_attach_type type, ++ void *t_ctx); ++ ++int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, ++ struct bpf_sock_ops_kern *sock_ops, ++ enum bpf_attach_type type); ++ ++int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, ++ short access, enum bpf_attach_type type); ++ ++int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, ++ struct ctl_table *table, int write, ++ void __user *buf, size_t *pcount, ++ loff_t *ppos, void **new_buf, ++ enum bpf_attach_type type); ++ ++int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, ++ int *optname, char __user *optval, ++ int *optlen, char **kernel_optval); ++int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ++ int optname, char __user *optval, ++ int __user *optlen, int max_optlen, ++ int retval); ++ ++static inline enum bpf_cgroup_storage_type cgroup_storage_type( ++ struct bpf_map *map) ++{ ++ if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) ++ return BPF_CGROUP_STORAGE_PERCPU; ++ ++ return BPF_CGROUP_STORAGE_SHARED; ++} ++ ++static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage ++ *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) ++{ ++ enum bpf_cgroup_storage_type stype; ++ ++ for_each_cgroup_storage_type(stype) ++ this_cpu_write(bpf_cgroup_storage[stype], storage[stype]); ++} ++ ++struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, ++ enum bpf_cgroup_storage_type stype); ++void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage); ++void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, ++ struct cgroup *cgroup, ++ enum bpf_attach_type type); ++void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); ++int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map); ++void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map); ++ ++int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); ++int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, ++ void *value, u64 flags); ++ ++/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ ++#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ++({ \ ++ int __ret = 0; \ ++ if (cgroup_bpf_enabled) \ ++ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ ++ BPF_CGROUP_INET_INGRESS); \ ++ \ ++ __ret; \ ++}) ++ ++#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \ ++({ \ ++ int __ret = 0; \ ++ if (cgroup_bpf_enabled && sk && sk == skb->sk) { \ ++ typeof(sk) __sk = sk_to_full_sk(sk); \ ++ if (sk_fullsock(__sk)) \ ++ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ ++ BPF_CGROUP_INET_EGRESS); \ ++ } \ ++ __ret; \ ++}) ++ ++#define BPF_CGROUP_RUN_SK_PROG(sk, type) \ ++({ \ ++ int __ret = 0; \ ++ if (cgroup_bpf_enabled) { \ ++ __ret = __cgroup_bpf_run_filter_sk(sk, type); \ ++ } \ ++ __ret; \ ++}) ++ ++#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ ++ BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE) ++ ++#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \ ++ BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND) ++ ++#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ ++ BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND) ++ ++#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \ ++({ \ ++ int __ret = 0; \ ++ if (cgroup_bpf_enabled) \ ++ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ ++ NULL); \ ++ __ret; \ ++}) ++ ++#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \ ++({ \ ++ int __ret = 0; \ ++ if (cgroup_bpf_enabled) { \ ++ lock_sock(sk); \ ++ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \ ++ t_ctx); \ ++ release_sock(sk); \ ++ } \ ++ __ret; \ ++}) ++ ++#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \ ++ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND) ++ ++#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \ ++ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND) ++ ++#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \ ++ sk->sk_prot->pre_connect) ++ ++#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ ++ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT) ++ ++#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ ++ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT) ++ ++#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ ++ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL) ++ ++#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ ++ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL) ++ ++#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ ++ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx) ++ ++#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ ++ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx) ++ ++#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ ++ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_RECVMSG, NULL) ++ ++#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ ++ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL) ++ ++#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ ++({ \ ++ int __ret = 0; \ ++ if (cgroup_bpf_enabled && (sock_ops)->sk) { \ ++ typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \ ++ if (__sk && sk_fullsock(__sk)) \ ++ __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \ ++ sock_ops, \ ++ BPF_CGROUP_SOCK_OPS); \ ++ } \ ++ __ret; \ ++}) ++ ++#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \ ++({ \ ++ int __ret = 0; \ ++ if (cgroup_bpf_enabled) \ ++ __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \ ++ access, \ ++ BPF_CGROUP_DEVICE); \ ++ \ ++ __ret; \ ++}) ++ ++ ++#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf) \ ++({ \ ++ int __ret = 0; \ ++ if (cgroup_bpf_enabled) \ ++ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ ++ buf, count, pos, nbuf, \ ++ BPF_CGROUP_SYSCTL); \ ++ __ret; \ ++}) ++ ++#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ ++ kernel_optval) \ ++({ \ ++ int __ret = 0; \ ++ if (cgroup_bpf_enabled) \ ++ __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \ ++ optname, optval, \ ++ optlen, \ ++ kernel_optval); \ ++ __ret; \ ++}) ++ ++#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \ ++({ \ ++ int __ret = 0; \ ++ if (cgroup_bpf_enabled) \ ++ get_user(__ret, optlen); \ ++ __ret; \ ++}) ++ ++#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \ ++ max_optlen, retval) \ ++({ \ ++ int __ret = retval; \ ++ if (cgroup_bpf_enabled) \ ++ __ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \ ++ optname, optval, \ ++ optlen, max_optlen, \ ++ retval); \ ++ __ret; \ ++}) ++ ++int cgroup_bpf_prog_attach(const union bpf_attr *attr, ++ enum bpf_prog_type ptype, struct bpf_prog *prog); ++int cgroup_bpf_prog_detach(const union bpf_attr *attr, ++ enum bpf_prog_type ptype); ++int cgroup_bpf_prog_query(const union bpf_attr *attr, ++ union bpf_attr __user *uattr); ++#else ++ ++struct bpf_prog; ++struct cgroup_bpf {}; ++static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } ++static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} ++ ++static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, ++ enum bpf_prog_type ptype, ++ struct bpf_prog *prog) ++{ ++ return -EINVAL; ++} ++ ++static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr, ++ enum bpf_prog_type ptype) ++{ ++ return -EINVAL; ++} ++ ++static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ return -EINVAL; ++} ++ ++static inline void bpf_cgroup_storage_set( ++ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {} ++static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog, ++ struct bpf_map *map) { return 0; } ++static inline void bpf_cgroup_storage_release(struct bpf_prog *prog, ++ struct bpf_map *map) {} ++static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( ++ struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; } ++static inline void bpf_cgroup_storage_free( ++ struct bpf_cgroup_storage *storage) {} ++static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, ++ void *value) { ++ return 0; ++} ++static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, ++ void *key, void *value, u64 flags) { ++ return 0; ++} ++ ++#define cgroup_bpf_enabled (0) ++#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) ++#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; }) ++#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) ++#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ ++ optlen, max_optlen, retval) ({ retval; }) ++#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ ++ kernel_optval) ({ 0; }) ++ ++#define for_each_cgroup_storage_type(stype) for (; false; ) ++ ++#endif /* CONFIG_CGROUP_BPF */ ++ ++#endif /* _BPF_CGROUP_H */ +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -1,55 +1,183 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ + /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of version 2 of the GNU General Public +- * License as published by the Free Software Foundation. + */ + #ifndef _LINUX_BPF_H + #define _LINUX_BPF_H 1 + + #include ++ + #include + #include ++#include ++#include ++#include ++#include ++#include ++#include + ++struct bpf_verifier_env; ++struct perf_event; ++struct bpf_prog; + struct bpf_map; ++struct sock; ++struct seq_file; ++struct btf; ++struct btf_type; ++ ++extern struct idr btf_idr; ++extern spinlock_t btf_idr_lock; + + /* map is generic key/value storage optionally accesible by eBPF programs */ + struct bpf_map_ops { + /* funcs callable from userspace (via syscall) */ ++ int (*map_alloc_check)(union bpf_attr *attr); + struct bpf_map *(*map_alloc)(union bpf_attr *attr); +- void (*map_free)(struct bpf_map *); ++ void (*map_release)(struct bpf_map *map, struct file *map_file); ++ void (*map_free)(struct bpf_map *map); + int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); ++ void (*map_release_uref)(struct bpf_map *map); ++ void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); + + /* funcs callable from userspace and from eBPF programs */ + void *(*map_lookup_elem)(struct bpf_map *map, void *key); + int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); + int (*map_delete_elem)(struct bpf_map *map, void *key); ++ int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); ++ int (*map_pop_elem)(struct bpf_map *map, void *value); ++ int (*map_peek_elem)(struct bpf_map *map, void *value); + + /* funcs called by prog_array and perf_event_array map */ +- void *(*map_fd_get_ptr) (struct bpf_map *map, int fd); +- void (*map_fd_put_ptr) (void *ptr); ++ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, ++ int fd); ++ void (*map_fd_put_ptr)(void *ptr); ++ u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); ++ u32 (*map_fd_sys_lookup_elem)(void *ptr); ++ void (*map_seq_show_elem)(struct bpf_map *map, void *key, ++ struct seq_file *m); ++ int (*map_check_btf)(const struct bpf_map *map, ++ const struct btf *btf, ++ const struct btf_type *key_type, ++ const struct btf_type *value_type); ++ ++ /* Direct value access helpers. */ ++ int (*map_direct_value_addr)(const struct bpf_map *map, ++ u64 *imm, u32 off); ++ int (*map_direct_value_meta)(const struct bpf_map *map, ++ u64 imm, u32 *off); ++}; ++ ++struct bpf_map_memory { ++ u32 pages; ++ struct user_struct *user; + }; + + struct bpf_map { +- atomic_t refcnt; ++ /* The first two cachelines with read-mostly members of which some ++ * are also accessed in fast-path (e.g. ops, max_entries). ++ */ ++ const struct bpf_map_ops *ops ____cacheline_aligned; ++ struct bpf_map *inner_map_meta; ++#ifdef CONFIG_SECURITY ++ void *security; ++#endif + enum bpf_map_type map_type; + u32 key_size; + u32 value_size; + u32 max_entries; +- u32 pages; ++ u32 map_flags; ++ int spin_lock_off; /* >=0 valid offset, <0 error */ ++ u32 id; ++ int numa_node; ++ u32 btf_key_type_id; ++ u32 btf_value_type_id; ++ struct btf *btf; ++ struct bpf_map_memory memory; + bool unpriv_array; +- struct user_struct *user; +- const struct bpf_map_ops *ops; +- struct work_struct work; ++ bool frozen; /* write-once */ ++ /* 48 bytes hole */ ++ ++ /* The 3rd and 4th cacheline with misc members to avoid false sharing ++ * particularly with refcounting. ++ */ ++ atomic_t refcnt ____cacheline_aligned; + atomic_t usercnt; ++ struct work_struct work; ++ char name[BPF_OBJ_NAME_LEN]; + }; + +-struct bpf_map_type_list { +- struct list_head list_node; +- const struct bpf_map_ops *ops; +- enum bpf_map_type type; ++static inline bool map_value_has_spin_lock(const struct bpf_map *map) ++{ ++ return map->spin_lock_off >= 0; ++} ++ ++static inline void check_and_init_map_lock(struct bpf_map *map, void *dst) ++{ ++ if (likely(!map_value_has_spin_lock(map))) ++ return; ++ *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = ++ (struct bpf_spin_lock){}; ++} ++ ++/* copy everything but bpf_spin_lock */ ++static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) ++{ ++ if (unlikely(map_value_has_spin_lock(map))) { ++ u32 off = map->spin_lock_off; ++ ++ memcpy(dst, src, off); ++ memcpy(dst + off + sizeof(struct bpf_spin_lock), ++ src + off + sizeof(struct bpf_spin_lock), ++ map->value_size - off - sizeof(struct bpf_spin_lock)); ++ } else { ++ memcpy(dst, src, map->value_size); ++ } ++} ++void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, ++ bool lock_src); ++ ++struct bpf_offload_dev; ++struct bpf_offloaded_map; ++ ++struct bpf_map_dev_ops { ++ int (*map_get_next_key)(struct bpf_offloaded_map *map, ++ void *key, void *next_key); ++ int (*map_lookup_elem)(struct bpf_offloaded_map *map, ++ void *key, void *value); ++ int (*map_update_elem)(struct bpf_offloaded_map *map, ++ void *key, void *value, u64 flags); ++ int (*map_delete_elem)(struct bpf_offloaded_map *map, void *key); + }; + ++struct bpf_offloaded_map { ++ struct bpf_map map; ++ struct net_device *netdev; ++ const struct bpf_map_dev_ops *dev_ops; ++ void *dev_priv; ++ struct list_head offloads; ++}; ++ ++static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map) ++{ ++ return container_of(map, struct bpf_offloaded_map, map); ++} ++ ++static inline bool bpf_map_offload_neutral(const struct bpf_map *map) ++{ ++ return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; ++} ++ ++static inline bool bpf_map_support_seq_show(const struct bpf_map *map) ++{ ++ return map->btf && map->ops->map_seq_show_elem; ++} ++ ++int map_check_no_btf(const struct bpf_map *map, ++ const struct btf *btf, ++ const struct btf_type *key_type, ++ const struct btf_type *value_type); ++ ++extern const struct bpf_map_ops bpf_map_offload_ops; ++ + /* function argument constraints */ + enum bpf_arg_type { + ARG_DONTCARE = 0, /* unused argument in helper function */ +@@ -60,22 +188,40 @@ enum bpf_arg_type { + ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ + ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ + ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ ++ ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ ++ ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */ + + /* the following constraints used to prototype bpf_memcmp() and other + * functions that access data on eBPF program stack + */ +- ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ +- ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ ++ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ ++ ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */ ++ ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, ++ * helper function must fill all bytes or clear ++ * them in error case. ++ */ ++ ++ ARG_CONST_SIZE, /* number of bytes accessed from memory */ ++ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ + + ARG_PTR_TO_CTX, /* pointer to context */ + ARG_ANYTHING, /* any (initialized) argument is ok */ ++ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ++ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ ++ ARG_PTR_TO_INT, /* pointer to int */ ++ ARG_PTR_TO_LONG, /* pointer to long */ ++ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ + }; + + /* type of values returned from helper functions */ + enum bpf_return_type { + RET_INTEGER, /* function returns integer */ + RET_VOID, /* function doesn't return anything */ ++ RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ + RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ ++ RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ ++ RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ ++ RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ + }; + + /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs +@@ -85,6 +231,7 @@ enum bpf_return_type { + struct bpf_func_proto { + u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); + bool gpl_only; ++ bool pkt_access; + enum bpf_return_type ret_type; + enum bpf_arg_type arg1_type; + enum bpf_arg_type arg2_type; +@@ -104,35 +251,172 @@ enum bpf_access_type { + BPF_WRITE = 2 + }; + +-struct bpf_prog; ++/* types of values stored in eBPF registers */ ++/* Pointer types represent: ++ * pointer ++ * pointer + imm ++ * pointer + (u16) var ++ * pointer + (u16) var + imm ++ * if (range > 0) then [ptr, ptr + range - off) is safe to access ++ * if (id > 0) means that some 'var' was added ++ * if (off > 0) means that 'imm' was added ++ */ ++enum bpf_reg_type { ++ NOT_INIT = 0, /* nothing was written into register */ ++ SCALAR_VALUE, /* reg doesn't contain a valid pointer */ ++ PTR_TO_CTX, /* reg points to bpf_context */ ++ CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ ++ PTR_TO_MAP_VALUE, /* reg points to map element value */ ++ PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ ++ PTR_TO_STACK, /* reg == frame_pointer + offset */ ++ PTR_TO_PACKET_META, /* skb->data - meta_len */ ++ PTR_TO_PACKET, /* reg points to skb->data */ ++ PTR_TO_PACKET_END, /* skb->data + headlen */ ++ PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ ++ PTR_TO_SOCKET, /* reg points to struct bpf_sock */ ++ PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ ++ PTR_TO_SOCK_COMMON, /* reg points to sock_common */ ++ PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ ++ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ ++ PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ ++ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ ++ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ ++}; ++ ++/* The information passed from prog-specific *_is_valid_access ++ * back to the verifier. ++ */ ++struct bpf_insn_access_aux { ++ enum bpf_reg_type reg_type; ++ int ctx_field_size; ++}; ++ ++static inline void ++bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) ++{ ++ aux->ctx_field_size = size; ++} ++ ++struct bpf_prog_ops { ++ int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, ++ union bpf_attr __user *uattr); ++}; + + struct bpf_verifier_ops { + /* return eBPF function prototype for verification */ +- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); ++ const struct bpf_func_proto * ++ (*get_func_proto)(enum bpf_func_id func_id, ++ const struct bpf_prog *prog); + + /* return true if 'size' wide access at offset 'off' within bpf_context + * with 'type' (read or write) is allowed + */ +- bool (*is_valid_access)(int off, int size, enum bpf_access_type type); ++ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info); ++ int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, ++ const struct bpf_prog *prog); ++ int (*gen_ld_abs)(const struct bpf_insn *orig, ++ struct bpf_insn *insn_buf); ++ u32 (*convert_ctx_access)(enum bpf_access_type type, ++ const struct bpf_insn *src, ++ struct bpf_insn *dst, ++ struct bpf_prog *prog, u32 *target_size); ++}; + +- u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, +- int src_reg, int ctx_off, +- struct bpf_insn *insn, struct bpf_prog *prog); ++struct bpf_prog_offload_ops { ++ /* verifier basic callbacks */ ++ int (*insn_hook)(struct bpf_verifier_env *env, ++ int insn_idx, int prev_insn_idx); ++ int (*finalize)(struct bpf_verifier_env *env); ++ /* verifier optimization callbacks (called after .finalize) */ ++ int (*replace_insn)(struct bpf_verifier_env *env, u32 off, ++ struct bpf_insn *insn); ++ int (*remove_insns)(struct bpf_verifier_env *env, u32 off, u32 cnt); ++ /* program management callbacks */ ++ int (*prepare)(struct bpf_prog *prog); ++ int (*translate)(struct bpf_prog *prog); ++ void (*destroy)(struct bpf_prog *prog); + }; + +-struct bpf_prog_type_list { +- struct list_head list_node; +- const struct bpf_verifier_ops *ops; +- enum bpf_prog_type type; ++struct bpf_prog_offload { ++ struct bpf_prog *prog; ++ struct net_device *netdev; ++ struct bpf_offload_dev *offdev; ++ void *dev_priv; ++ struct list_head offloads; ++ bool dev_state; ++ bool opt_failed; ++ void *jited_image; ++ u32 jited_len; ++}; ++ ++enum bpf_cgroup_storage_type { ++ BPF_CGROUP_STORAGE_SHARED, ++ BPF_CGROUP_STORAGE_PERCPU, ++ __BPF_CGROUP_STORAGE_MAX ++}; ++ ++#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX ++ ++struct bpf_prog_stats { ++ u64 cnt; ++ u64 nsecs; ++ struct u64_stats_sync syncp; + }; + + struct bpf_prog_aux { + atomic_t refcnt; + u32 used_map_cnt; +- const struct bpf_verifier_ops *ops; ++ u32 max_ctx_offset; ++ u32 max_pkt_offset; ++ u32 max_tp_access; ++ u32 stack_depth; ++ u32 id; ++ u32 func_cnt; /* used by non-func prog as the number of func progs */ ++ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ ++ bool verifier_zext; /* Zero extensions has been inserted by verifier. */ ++ bool offload_requested; ++ struct bpf_prog **func; ++ void *jit_data; /* JIT specific data. arch dependent */ ++ struct latch_tree_node ksym_tnode; ++ struct list_head ksym_lnode; ++ const struct bpf_prog_ops *ops; + struct bpf_map **used_maps; + struct bpf_prog *prog; + struct user_struct *user; ++ u64 load_time; /* ns since boottime */ ++ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; ++ char name[BPF_OBJ_NAME_LEN]; ++#ifdef CONFIG_SECURITY ++ void *security; ++#endif ++ struct bpf_prog_offload *offload; ++ struct btf *btf; ++ struct bpf_func_info *func_info; ++ /* bpf_line_info loaded from userspace. linfo->insn_off ++ * has the xlated insn offset. ++ * Both the main and sub prog share the same linfo. ++ * The subprog can access its first linfo by ++ * using the linfo_idx. ++ */ ++ struct bpf_line_info *linfo; ++ /* jited_linfo is the jited addr of the linfo. It has a ++ * one to one mapping to linfo: ++ * jited_linfo[i] is the jited addr for the linfo[i]->insn_off. ++ * Both the main and sub prog share the same jited_linfo. ++ * The subprog can access its first jited_linfo by ++ * using the linfo_idx. ++ */ ++ void **jited_linfo; ++ u32 func_info_cnt; ++ u32 nr_linfo; ++ /* subprog can use linfo_idx to access its first linfo and ++ * jited_linfo. ++ * main prog always has linfo_idx == 0 ++ */ ++ u32 linfo_idx; ++ struct bpf_prog_stats __percpu *stats; + union { + struct work_struct work; + struct rcu_head rcu; +@@ -153,76 +437,688 @@ struct bpf_array { + union { + char value[0] __aligned(8); + void *ptrs[0] __aligned(8); ++ void __percpu *pptrs[0] __aligned(8); + }; + }; ++ ++#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ + #define MAX_TAIL_CALL_CNT 32 + +-u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); +-void bpf_fd_array_map_clear(struct bpf_map *map); ++#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ ++ BPF_F_RDONLY_PROG | \ ++ BPF_F_WRONLY | \ ++ BPF_F_WRONLY_PROG) ++ ++#define BPF_MAP_CAN_READ BIT(0) ++#define BPF_MAP_CAN_WRITE BIT(1) ++ ++static inline u32 bpf_map_flags_to_cap(struct bpf_map *map) ++{ ++ u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); ++ ++ /* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is ++ * not possible. ++ */ ++ if (access_flags & BPF_F_RDONLY_PROG) ++ return BPF_MAP_CAN_READ; ++ else if (access_flags & BPF_F_WRONLY_PROG) ++ return BPF_MAP_CAN_WRITE; ++ else ++ return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE; ++} ++ ++static inline bool bpf_map_flags_access_ok(u32 access_flags) ++{ ++ return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) != ++ (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); ++} ++ ++struct bpf_event_entry { ++ struct perf_event *event; ++ struct file *perf_file; ++ struct file *map_file; ++ struct rcu_head rcu; ++}; ++ + bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); ++int bpf_prog_calc_tag(struct bpf_prog *fp); ++ + const struct bpf_func_proto *bpf_get_trace_printk_proto(void); + ++typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, ++ unsigned long off, unsigned long len); ++typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, ++ const struct bpf_insn *src, ++ struct bpf_insn *dst, ++ struct bpf_prog *prog, ++ u32 *target_size); ++ ++u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, ++ void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); ++ ++/* an array of programs to be executed under rcu_lock. ++ * ++ * Typical usage: ++ * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN); ++ * ++ * the structure returned by bpf_prog_array_alloc() should be populated ++ * with program pointers and the last pointer must be NULL. ++ * The user has to keep refcnt on the program and make sure the program ++ * is removed from the array before bpf_prog_put(). ++ * The 'struct bpf_prog_array *' should only be replaced with xchg() ++ * since other cpus are walking the array of pointers in parallel. ++ */ ++struct bpf_prog_array_item { ++ struct bpf_prog *prog; ++ struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; ++}; ++ ++struct bpf_prog_array { ++ struct rcu_head rcu; ++ struct bpf_prog_array_item items[0]; ++}; ++ ++struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); ++void bpf_prog_array_free(struct bpf_prog_array *progs); ++int bpf_prog_array_length(struct bpf_prog_array *progs); ++bool bpf_prog_array_is_empty(struct bpf_prog_array *array); ++int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs, ++ __u32 __user *prog_ids, u32 cnt); ++ ++void bpf_prog_array_delete_safe(struct bpf_prog_array *progs, ++ struct bpf_prog *old_prog); ++int bpf_prog_array_copy_info(struct bpf_prog_array *array, ++ u32 *prog_ids, u32 request_cnt, ++ u32 *prog_cnt); ++int bpf_prog_array_copy(struct bpf_prog_array *old_array, ++ struct bpf_prog *exclude_prog, ++ struct bpf_prog *include_prog, ++ struct bpf_prog_array **new_array); ++ ++#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \ ++ ({ \ ++ struct bpf_prog_array_item *_item; \ ++ struct bpf_prog *_prog; \ ++ struct bpf_prog_array *_array; \ ++ u32 _ret = 1; \ ++ preempt_disable(); \ ++ rcu_read_lock(); \ ++ _array = rcu_dereference(array); \ ++ if (unlikely(check_non_null && !_array))\ ++ goto _out; \ ++ _item = &_array->items[0]; \ ++ while ((_prog = READ_ONCE(_item->prog))) { \ ++ if (set_cg_storage) \ ++ bpf_cgroup_storage_set(_item->cgroup_storage); \ ++ _ret &= func(_prog, ctx); \ ++ _item++; \ ++ } \ ++_out: \ ++ rcu_read_unlock(); \ ++ preempt_enable(); \ ++ _ret; \ ++ }) ++ ++/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs ++ * so BPF programs can request cwr for TCP packets. ++ * ++ * Current cgroup skb programs can only return 0 or 1 (0 to drop the ++ * packet. This macro changes the behavior so the low order bit ++ * indicates whether the packet should be dropped (0) or not (1) ++ * and the next bit is a congestion notification bit. This could be ++ * used by TCP to call tcp_enter_cwr() ++ * ++ * Hence, new allowed return values of CGROUP EGRESS BPF programs are: ++ * 0: drop packet ++ * 1: keep packet ++ * 2: drop packet and cn ++ * 3: keep packet and cn ++ * ++ * This macro then converts it to one of the NET_XMIT or an error ++ * code that is then interpreted as drop packet (and no cn): ++ * 0: NET_XMIT_SUCCESS skb should be transmitted ++ * 1: NET_XMIT_DROP skb should be dropped and cn ++ * 2: NET_XMIT_CN skb should be transmitted and cn ++ * 3: -EPERM skb should be dropped ++ */ ++#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ ++ ({ \ ++ struct bpf_prog_array_item *_item; \ ++ struct bpf_prog *_prog; \ ++ struct bpf_prog_array *_array; \ ++ u32 ret; \ ++ u32 _ret = 1; \ ++ u32 _cn = 0; \ ++ preempt_disable(); \ ++ rcu_read_lock(); \ ++ _array = rcu_dereference(array); \ ++ _item = &_array->items[0]; \ ++ while ((_prog = READ_ONCE(_item->prog))) { \ ++ bpf_cgroup_storage_set(_item->cgroup_storage); \ ++ ret = func(_prog, ctx); \ ++ _ret &= (ret & 1); \ ++ _cn |= (ret & 2); \ ++ _item++; \ ++ } \ ++ rcu_read_unlock(); \ ++ preempt_enable(); \ ++ if (_ret) \ ++ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ ++ else \ ++ _ret = (_cn ? NET_XMIT_DROP : -EPERM); \ ++ _ret; \ ++ }) ++ ++#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ ++ __BPF_PROG_RUN_ARRAY(array, ctx, func, false, true) ++ ++#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ ++ __BPF_PROG_RUN_ARRAY(array, ctx, func, true, false) ++ + #ifdef CONFIG_BPF_SYSCALL +-void bpf_register_prog_type(struct bpf_prog_type_list *tl); +-void bpf_register_map_type(struct bpf_map_type_list *tl); ++DECLARE_PER_CPU(int, bpf_prog_active); ++ ++extern const struct file_operations bpf_map_fops; ++extern const struct file_operations bpf_prog_fops; ++ ++#define BPF_PROG_TYPE(_id, _name) \ ++ extern const struct bpf_prog_ops _name ## _prog_ops; \ ++ extern const struct bpf_verifier_ops _name ## _verifier_ops; ++#define BPF_MAP_TYPE(_id, _ops) \ ++ extern const struct bpf_map_ops _ops; ++#include ++#undef BPF_PROG_TYPE ++#undef BPF_MAP_TYPE ++ ++extern const struct bpf_prog_ops bpf_offload_prog_ops; ++extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; ++extern const struct bpf_verifier_ops xdp_analyzer_ops; + + struct bpf_prog *bpf_prog_get(u32 ufd); +-struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); ++struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, ++ bool attach_drv); ++struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); ++void bpf_prog_sub(struct bpf_prog *prog, int i); ++struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); ++struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); + void bpf_prog_put(struct bpf_prog *prog); +-void bpf_prog_put_rcu(struct bpf_prog *prog); ++int __bpf_prog_charge(struct user_struct *user, u32 pages); ++void __bpf_prog_uncharge(struct user_struct *user, u32 pages); ++ ++void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); ++void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); + + struct bpf_map *bpf_map_get_with_uref(u32 ufd); + struct bpf_map *__bpf_map_get(struct fd f); +-struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref); ++struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); ++struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map, ++ bool uref); + void bpf_map_put_with_uref(struct bpf_map *map); + void bpf_map_put(struct bpf_map *map); ++int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); ++void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); ++int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size); ++void bpf_map_charge_finish(struct bpf_map_memory *mem); ++void bpf_map_charge_move(struct bpf_map_memory *dst, ++ struct bpf_map_memory *src); ++void *bpf_map_area_alloc(u64 size, int numa_node); ++void bpf_map_area_free(void *base); ++void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); + + extern int sysctl_unprivileged_bpf_disabled; + +-int bpf_map_new_fd(struct bpf_map *map); ++int bpf_map_new_fd(struct bpf_map *map, int flags); + int bpf_prog_new_fd(struct bpf_prog *prog); + + int bpf_obj_pin_user(u32 ufd, const char __user *pathname); +-int bpf_obj_get_user(const char __user *pathname); ++int bpf_obj_get_user(const char __user *pathname, int flags); ++ ++int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); ++int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); ++int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, ++ u64 flags); ++int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, ++ u64 flags); ++ ++int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); ++ ++int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, ++ void *key, void *value, u64 map_flags); ++int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); ++int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, ++ void *key, void *value, u64 map_flags); ++int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); ++ ++int bpf_get_file_flag(int flags); ++int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size, ++ size_t actual_size); ++ ++/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and ++ * forced to use 'long' read/writes to try to atomically copy long counters. ++ * Best-effort only. No barriers here, since it _will_ race with concurrent ++ * updates from BPF programs. Called from bpf syscall and mostly used with ++ * size 8 or 16 bytes, so ask compiler to inline it. ++ */ ++static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) ++{ ++ const long *lsrc = src; ++ long *ldst = dst; ++ ++ size /= sizeof(long); ++ while (size--) ++ *ldst++ = *lsrc++; ++} + + /* verify correctness of eBPF program */ +-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); +-#else +-static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) ++int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, ++ union bpf_attr __user *uattr); ++ ++#ifndef CONFIG_BPF_JIT_ALWAYS_ON ++void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); ++#endif ++ ++/* Map specifics */ ++struct xdp_buff; ++struct sk_buff; ++ ++struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); ++struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); ++void __dev_map_flush(struct bpf_map *map); ++int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, ++ struct net_device *dev_rx); ++int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, ++ struct bpf_prog *xdp_prog); ++ ++struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); ++void __cpu_map_flush(struct bpf_map *map); ++int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, ++ struct net_device *dev_rx); ++ ++/* Return map's numa specified by userspace */ ++static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) + { ++ return (attr->map_flags & BPF_F_NUMA_NODE) ? ++ attr->numa_node : NUMA_NO_NODE; + } + ++struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); ++int array_map_alloc_check(union bpf_attr *attr); ++ ++int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, ++ union bpf_attr __user *uattr); ++int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, ++ union bpf_attr __user *uattr); ++int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, ++ const union bpf_attr *kattr, ++ union bpf_attr __user *uattr); ++#else /* !CONFIG_BPF_SYSCALL */ + static inline struct bpf_prog *bpf_prog_get(u32 ufd) + { + return ERR_PTR(-EOPNOTSUPP); + } + ++static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, ++ enum bpf_prog_type type, ++ bool attach_drv) ++{ ++ return ERR_PTR(-EOPNOTSUPP); ++} ++ ++static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, ++ int i) ++{ ++ return ERR_PTR(-EOPNOTSUPP); ++} ++ ++static inline void bpf_prog_sub(struct bpf_prog *prog, int i) ++{ ++} ++ + static inline void bpf_prog_put(struct bpf_prog *prog) + { + } + +-static inline void bpf_prog_put_rcu(struct bpf_prog *prog) ++static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog) ++{ ++ return ERR_PTR(-EOPNOTSUPP); ++} ++ ++static inline struct bpf_prog *__must_check ++bpf_prog_inc_not_zero(struct bpf_prog *prog) ++{ ++ return ERR_PTR(-EOPNOTSUPP); ++} ++ ++static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) ++{ ++ return 0; ++} ++ ++static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) ++{ ++} ++ ++static inline int bpf_obj_get_user(const char __user *pathname, int flags) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, ++ u32 key) ++{ ++ return NULL; ++} ++ ++static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map, ++ u32 key) ++{ ++ return NULL; ++} ++ ++static inline void __dev_map_flush(struct bpf_map *map) ++{ ++} ++ ++struct xdp_buff; ++struct bpf_dtab_netdev; ++ ++static inline ++int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, ++ struct net_device *dev_rx) ++{ ++ return 0; ++} ++ ++struct sk_buff; ++ ++static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, ++ struct sk_buff *skb, ++ struct bpf_prog *xdp_prog) ++{ ++ return 0; ++} ++ ++static inline ++struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) ++{ ++ return NULL; ++} ++ ++static inline void __cpu_map_flush(struct bpf_map *map) ++{ ++} ++ ++static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, ++ struct xdp_buff *xdp, ++ struct net_device *dev_rx) ++{ ++ return 0; ++} ++ ++static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, ++ enum bpf_prog_type type) ++{ ++ return ERR_PTR(-EOPNOTSUPP); ++} ++ ++static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog, ++ const union bpf_attr *kattr, ++ union bpf_attr __user *uattr) ++{ ++ return -ENOTSUPP; ++} ++ ++static inline int bpf_prog_test_run_skb(struct bpf_prog *prog, ++ const union bpf_attr *kattr, ++ union bpf_attr __user *uattr) ++{ ++ return -ENOTSUPP; ++} ++ ++static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, ++ const union bpf_attr *kattr, ++ union bpf_attr __user *uattr) ++{ ++ return -ENOTSUPP; ++} ++#endif /* CONFIG_BPF_SYSCALL */ ++ ++static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, ++ enum bpf_prog_type type) ++{ ++ return bpf_prog_get_type_dev(ufd, type, false); ++} ++ ++bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); ++ ++#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) ++ ++static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) ++{ ++ return aux->offload_requested; ++} ++ ++static inline bool bpf_map_is_dev_bound(struct bpf_map *map) ++{ ++ return false; ++} ++ ++#else ++static inline int bpf_prog_offload_init(struct bpf_prog *prog, ++ union bpf_attr *attr) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) ++{ ++ return false; ++} ++ ++static inline bool bpf_map_is_dev_bound(struct bpf_map *map) ++{ ++ return false; ++} ++ ++#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ ++ ++#if defined(CONFIG_BPF_STREAM_PARSER) ++int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, ++ struct bpf_prog *old, u32 which); ++int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); ++int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); ++#else ++static inline int sock_map_prog_update(struct bpf_map *map, ++ struct bpf_prog *prog, ++ struct bpf_prog *old, u32 which) + { ++ return -EOPNOTSUPP; ++} ++ ++static inline int sock_map_get_from_fd(const union bpf_attr *attr, ++ struct bpf_prog *prog) ++{ ++ return -EINVAL; ++} ++ ++static inline int sock_map_prog_detach(const union bpf_attr *attr, ++ enum bpf_prog_type ptype) ++{ ++ return -EOPNOTSUPP; ++} ++#endif ++ ++#if defined(CONFIG_XDP_SOCKETS) ++struct xdp_sock; ++struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key); ++int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, ++ struct xdp_sock *xs); ++void __xsk_map_flush(struct bpf_map *map); ++#else ++struct xdp_sock; ++static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, ++ u32 key) ++{ ++ return NULL; ++} ++ ++static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, ++ struct xdp_sock *xs) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static inline void __xsk_map_flush(struct bpf_map *map) ++{ ++} ++#endif ++ ++#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) ++void bpf_sk_reuseport_detach(struct sock *sk); ++int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, ++ void *value); ++int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, ++ void *value, u64 map_flags); ++#else ++static inline void bpf_sk_reuseport_detach(struct sock *sk) ++{ ++} ++ ++#ifdef CONFIG_BPF_SYSCALL ++static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, ++ void *key, void *value) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, ++ void *key, void *value, ++ u64 map_flags) ++{ ++ return -EOPNOTSUPP; + } + #endif /* CONFIG_BPF_SYSCALL */ ++#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */ + + /* verifier prototypes for helper functions called from eBPF programs */ + extern const struct bpf_func_proto bpf_map_lookup_elem_proto; + extern const struct bpf_func_proto bpf_map_update_elem_proto; + extern const struct bpf_func_proto bpf_map_delete_elem_proto; ++extern const struct bpf_func_proto bpf_map_push_elem_proto; ++extern const struct bpf_func_proto bpf_map_pop_elem_proto; ++extern const struct bpf_func_proto bpf_map_peek_elem_proto; + + extern const struct bpf_func_proto bpf_get_prandom_u32_proto; + extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; ++extern const struct bpf_func_proto bpf_get_numa_node_id_proto; + extern const struct bpf_func_proto bpf_tail_call_proto; + extern const struct bpf_func_proto bpf_ktime_get_ns_proto; + extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; + extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; + extern const struct bpf_func_proto bpf_get_current_comm_proto; +-extern const struct bpf_func_proto bpf_skb_vlan_push_proto; +-extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; ++extern const struct bpf_func_proto bpf_get_stackid_proto; ++extern const struct bpf_func_proto bpf_get_stack_proto; ++extern const struct bpf_func_proto bpf_sock_map_update_proto; ++extern const struct bpf_func_proto bpf_sock_hash_update_proto; ++extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; ++extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; ++extern const struct bpf_func_proto bpf_msg_redirect_map_proto; ++extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; ++extern const struct bpf_func_proto bpf_sk_redirect_map_proto; ++extern const struct bpf_func_proto bpf_spin_lock_proto; ++extern const struct bpf_func_proto bpf_spin_unlock_proto; ++extern const struct bpf_func_proto bpf_get_local_storage_proto; ++extern const struct bpf_func_proto bpf_strtol_proto; ++extern const struct bpf_func_proto bpf_strtoul_proto; ++extern const struct bpf_func_proto bpf_tcp_sock_proto; + + /* Shared helpers among cBPF and eBPF. */ + void bpf_user_rnd_init_once(void); + u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); + ++#if defined(CONFIG_NET) ++bool bpf_sock_common_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ struct bpf_insn_access_aux *info); ++bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, ++ struct bpf_insn_access_aux *info); ++u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, ++ u32 *target_size); ++#else ++static inline bool bpf_sock_common_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++static inline bool bpf_sock_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, ++ u32 *target_size) ++{ ++ return 0; ++} ++#endif ++ ++#ifdef CONFIG_INET ++bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, ++ struct bpf_insn_access_aux *info); ++ ++u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, ++ u32 *target_size); ++ ++bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type, ++ struct bpf_insn_access_aux *info); ++ ++u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, ++ u32 *target_size); ++#else ++static inline bool bpf_tcp_sock_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++ ++static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, ++ u32 *target_size) ++{ ++ return 0; ++} ++static inline bool bpf_xdp_sock_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++ ++static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, ++ u32 *target_size) ++{ ++ return 0; ++} ++#endif /* CONFIG_INET */ ++ + #endif /* _LINUX_BPF_H */ +--- /dev/null ++++ b/include/linux/bpf_trace.h +@@ -0,0 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __LINUX_BPF_TRACE_H__ ++#define __LINUX_BPF_TRACE_H__ ++ ++#include ++ ++#endif /* __LINUX_BPF_TRACE_H__ */ +--- /dev/null ++++ b/include/linux/bpf_types.h +@@ -0,0 +1,44 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* internal file - do not include directly */ ++ ++#ifdef CONFIG_NET ++BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) ++BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) ++BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) ++BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in) ++BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out) ++BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) ++BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local) ++BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) ++BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) ++BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) ++BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) ++#endif ++#ifdef CONFIG_BPF_EVENTS ++BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) ++BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) ++BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) ++BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) ++BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) ++#endif ++ ++BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) ++BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) ++BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops) ++BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops) ++BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops) ++BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops) ++BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops) ++BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops) ++BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops) ++#ifdef CONFIG_PERF_EVENTS ++BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops) ++#endif ++BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) ++BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) ++#ifdef CONFIG_NET ++BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) ++BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) ++#endif ++BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) ++BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) +--- /dev/null ++++ b/include/linux/bpf_verifier.h +@@ -0,0 +1,425 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com ++ */ ++#ifndef _LINUX_BPF_VERIFIER_H ++#define _LINUX_BPF_VERIFIER_H 1 ++ ++#include /* for enum bpf_reg_type */ ++#include /* for MAX_BPF_STACK */ ++#include ++ ++/* Maximum variable offset umax_value permitted when resolving memory accesses. ++ * In practice this is far bigger than any realistic pointer offset; this limit ++ * ensures that umax_value + (int)off + (int)size cannot overflow a u64. ++ */ ++#define BPF_MAX_VAR_OFF (1 << 29) ++/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures ++ * that converting umax_value to int cannot overflow. ++ */ ++#define BPF_MAX_VAR_SIZ (1 << 29) ++ ++/* Liveness marks, used for registers and spilled-regs (in stack slots). ++ * Read marks propagate upwards until they find a write mark; they record that ++ * "one of this state's descendants read this reg" (and therefore the reg is ++ * relevant for states_equal() checks). ++ * Write marks collect downwards and do not propagate; they record that "the ++ * straight-line code that reached this state (from its parent) wrote this reg" ++ * (and therefore that reads propagated from this state or its descendants ++ * should not propagate to its parent). ++ * A state with a write mark can receive read marks; it just won't propagate ++ * them to its parent, since the write mark is a property, not of the state, ++ * but of the link between it and its parent. See mark_reg_read() and ++ * mark_stack_slot_read() in kernel/bpf/verifier.c. ++ */ ++enum bpf_reg_liveness { ++ REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ ++ REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */ ++ REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */ ++ REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64, ++ REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */ ++ REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */ ++}; ++ ++struct bpf_reg_state { ++ /* Ordering of fields matters. See states_equal() */ ++ enum bpf_reg_type type; ++ union { ++ /* valid when type == PTR_TO_PACKET */ ++ u16 range; ++ ++ /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | ++ * PTR_TO_MAP_VALUE_OR_NULL ++ */ ++ struct bpf_map *map_ptr; ++ ++ /* Max size from any of the above. */ ++ unsigned long raw; ++ }; ++ /* Fixed part of pointer offset, pointer types only */ ++ s32 off; ++ /* For PTR_TO_PACKET, used to find other pointers with the same variable ++ * offset, so they can share range knowledge. ++ * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we ++ * came from, when one is tested for != NULL. ++ * For PTR_TO_SOCKET this is used to share which pointers retain the ++ * same reference to the socket, to determine proper reference freeing. ++ */ ++ u32 id; ++ /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned ++ * from a pointer-cast helper, bpf_sk_fullsock() and ++ * bpf_tcp_sock(). ++ * ++ * Consider the following where "sk" is a reference counted ++ * pointer returned from "sk = bpf_sk_lookup_tcp();": ++ * ++ * 1: sk = bpf_sk_lookup_tcp(); ++ * 2: if (!sk) { return 0; } ++ * 3: fullsock = bpf_sk_fullsock(sk); ++ * 4: if (!fullsock) { bpf_sk_release(sk); return 0; } ++ * 5: tp = bpf_tcp_sock(fullsock); ++ * 6: if (!tp) { bpf_sk_release(sk); return 0; } ++ * 7: bpf_sk_release(sk); ++ * 8: snd_cwnd = tp->snd_cwnd; // verifier will complain ++ * ++ * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and ++ * "tp" ptr should be invalidated also. In order to do that, ++ * the reg holding "fullsock" and "sk" need to remember ++ * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id ++ * such that the verifier can reset all regs which have ++ * ref_obj_id matching the sk_reg->id. ++ * ++ * sk_reg->ref_obj_id is set to sk_reg->id at line 1. ++ * sk_reg->id will stay as NULL-marking purpose only. ++ * After NULL-marking is done, sk_reg->id can be reset to 0. ++ * ++ * After "fullsock = bpf_sk_fullsock(sk);" at line 3, ++ * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id. ++ * ++ * After "tp = bpf_tcp_sock(fullsock);" at line 5, ++ * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id ++ * which is the same as sk_reg->ref_obj_id. ++ * ++ * From the verifier perspective, if sk, fullsock and tp ++ * are not NULL, they are the same ptr with different ++ * reg->type. In particular, bpf_sk_release(tp) is also ++ * allowed and has the same effect as bpf_sk_release(sk). ++ */ ++ u32 ref_obj_id; ++ /* For scalar types (SCALAR_VALUE), this represents our knowledge of ++ * the actual value. ++ * For pointer types, this represents the variable part of the offset ++ * from the pointed-to object, and is shared with all bpf_reg_states ++ * with the same id as us. ++ */ ++ struct tnum var_off; ++ /* Used to determine if any memory access using this register will ++ * result in a bad access. ++ * These refer to the same value as var_off, not necessarily the actual ++ * contents of the register. ++ */ ++ s64 smin_value; /* minimum possible (s64)value */ ++ s64 smax_value; /* maximum possible (s64)value */ ++ u64 umin_value; /* minimum possible (u64)value */ ++ u64 umax_value; /* maximum possible (u64)value */ ++ /* parentage chain for liveness checking */ ++ struct bpf_reg_state *parent; ++ /* Inside the callee two registers can be both PTR_TO_STACK like ++ * R1=fp-8 and R2=fp-8, but one of them points to this function stack ++ * while another to the caller's stack. To differentiate them 'frameno' ++ * is used which is an index in bpf_verifier_state->frame[] array ++ * pointing to bpf_func_state. ++ */ ++ u32 frameno; ++ /* Tracks subreg definition. The stored value is the insn_idx of the ++ * writing insn. This is safe because subreg_def is used before any insn ++ * patching which only happens after main verification finished. ++ */ ++ s32 subreg_def; ++ enum bpf_reg_liveness live; ++ /* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */ ++ bool precise; ++}; ++ ++enum bpf_stack_slot_type { ++ STACK_INVALID, /* nothing was stored in this stack slot */ ++ STACK_SPILL, /* register spilled into stack */ ++ STACK_MISC, /* BPF program wrote some data into this slot */ ++ STACK_ZERO, /* BPF program wrote constant zero */ ++}; ++ ++#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ ++ ++struct bpf_stack_state { ++ struct bpf_reg_state spilled_ptr; ++ u8 slot_type[BPF_REG_SIZE]; ++}; ++ ++struct bpf_reference_state { ++ /* Track each reference created with a unique id, even if the same ++ * instruction creates the reference multiple times (eg, via CALL). ++ */ ++ int id; ++ /* Instruction where the allocation of this reference occurred. This ++ * is used purely to inform the user of a reference leak. ++ */ ++ int insn_idx; ++}; ++ ++/* state of the program: ++ * type of all registers and stack info ++ */ ++struct bpf_func_state { ++ struct bpf_reg_state regs[MAX_BPF_REG]; ++ /* index of call instruction that called into this func */ ++ int callsite; ++ /* stack frame number of this function state from pov of ++ * enclosing bpf_verifier_state. ++ * 0 = main function, 1 = first callee. ++ */ ++ u32 frameno; ++ /* subprog number == index within subprog_stack_depth ++ * zero == main subprog ++ */ ++ u32 subprogno; ++ ++ /* The following fields should be last. See copy_func_state() */ ++ int acquired_refs; ++ struct bpf_reference_state *refs; ++ int allocated_stack; ++ struct bpf_stack_state *stack; ++}; ++ ++struct bpf_idx_pair { ++ u32 prev_idx; ++ u32 idx; ++}; ++ ++#define MAX_CALL_FRAMES 8 ++struct bpf_verifier_state { ++ /* call stack tracking */ ++ struct bpf_func_state *frame[MAX_CALL_FRAMES]; ++ struct bpf_verifier_state *parent; ++ /* ++ * 'branches' field is the number of branches left to explore: ++ * 0 - all possible paths from this state reached bpf_exit or ++ * were safely pruned ++ * 1 - at least one path is being explored. ++ * This state hasn't reached bpf_exit ++ * 2 - at least two paths are being explored. ++ * This state is an immediate parent of two children. ++ * One is fallthrough branch with branches==1 and another ++ * state is pushed into stack (to be explored later) also with ++ * branches==1. The parent of this state has branches==1. ++ * The verifier state tree connected via 'parent' pointer looks like: ++ * 1 ++ * 1 ++ * 2 -> 1 (first 'if' pushed into stack) ++ * 1 ++ * 2 -> 1 (second 'if' pushed into stack) ++ * 1 ++ * 1 ++ * 1 bpf_exit. ++ * ++ * Once do_check() reaches bpf_exit, it calls update_branch_counts() ++ * and the verifier state tree will look: ++ * 1 ++ * 1 ++ * 2 -> 1 (first 'if' pushed into stack) ++ * 1 ++ * 1 -> 1 (second 'if' pushed into stack) ++ * 0 ++ * 0 ++ * 0 bpf_exit. ++ * After pop_stack() the do_check() will resume at second 'if'. ++ * ++ * If is_state_visited() sees a state with branches > 0 it means ++ * there is a loop. If such state is exactly equal to the current state ++ * it's an infinite loop. Note states_equal() checks for states ++ * equvalency, so two states being 'states_equal' does not mean ++ * infinite loop. The exact comparison is provided by ++ * states_maybe_looping() function. It's a stronger pre-check and ++ * much faster than states_equal(). ++ * ++ * This algorithm may not find all possible infinite loops or ++ * loop iteration count may be too high. ++ * In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in. ++ */ ++ u32 branches; ++ u32 insn_idx; ++ u32 curframe; ++ u32 active_spin_lock; ++ bool speculative; ++ ++ /* first and last insn idx of this verifier state */ ++ u32 first_insn_idx; ++ u32 last_insn_idx; ++ /* jmp history recorded from first to last. ++ * backtracking is using it to go from last to first. ++ * For most states jmp_history_cnt is [0-3]. ++ * For loops can go up to ~40. ++ */ ++ struct bpf_idx_pair *jmp_history; ++ u32 jmp_history_cnt; ++}; ++ ++#define bpf_get_spilled_reg(slot, frame) \ ++ (((slot < frame->allocated_stack / BPF_REG_SIZE) && \ ++ (frame->stack[slot].slot_type[0] == STACK_SPILL)) \ ++ ? &frame->stack[slot].spilled_ptr : NULL) ++ ++/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */ ++#define bpf_for_each_spilled_reg(iter, frame, reg) \ ++ for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \ ++ iter < frame->allocated_stack / BPF_REG_SIZE; \ ++ iter++, reg = bpf_get_spilled_reg(iter, frame)) ++ ++/* linked list of verifier states used to prune search */ ++struct bpf_verifier_state_list { ++ struct bpf_verifier_state state; ++ struct bpf_verifier_state_list *next; ++ int miss_cnt, hit_cnt; ++}; ++ ++/* Possible states for alu_state member. */ ++#define BPF_ALU_SANITIZE_SRC (1U << 0) ++#define BPF_ALU_SANITIZE_DST (1U << 1) ++#define BPF_ALU_NEG_VALUE (1U << 2) ++#define BPF_ALU_NON_POINTER (1U << 3) ++#define BPF_ALU_IMMEDIATE (1U << 4) ++#define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ ++ BPF_ALU_SANITIZE_DST) ++ ++struct bpf_insn_aux_data { ++ union { ++ enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ ++ unsigned long map_state; /* pointer/poison value for maps */ ++ s32 call_imm; /* saved imm field of call insn */ ++ u32 alu_limit; /* limit for add/sub register with pointer */ ++ struct { ++ u32 map_index; /* index into used_maps[] */ ++ u32 map_off; /* offset from value base address */ ++ }; ++ }; ++ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ ++ int sanitize_stack_off; /* stack slot to be cleared */ ++ bool seen; /* this insn was processed by the verifier */ ++ bool zext_dst; /* this insn zero extends dst reg */ ++ u8 alu_state; /* used in combination with alu_limit */ ++ bool prune_point; ++ unsigned int orig_idx; /* original instruction index */ ++}; ++ ++#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ ++ ++#define BPF_VERIFIER_TMP_LOG_SIZE 1024 ++ ++struct bpf_verifier_log { ++ u32 level; ++ char kbuf[BPF_VERIFIER_TMP_LOG_SIZE]; ++ char __user *ubuf; ++ u32 len_used; ++ u32 len_total; ++}; ++ ++static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) ++{ ++ return log->len_used >= log->len_total - 1; ++} ++ ++#define BPF_LOG_LEVEL1 1 ++#define BPF_LOG_LEVEL2 2 ++#define BPF_LOG_STATS 4 ++#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) ++#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS) ++ ++static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) ++{ ++ return log->level && log->ubuf && !bpf_verifier_log_full(log); ++} ++ ++#define BPF_MAX_SUBPROGS 256 ++ ++struct bpf_subprog_info { ++ u32 start; /* insn idx of function entry point */ ++ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ ++ u16 stack_depth; /* max. stack depth used by this function */ ++ bool has_tail_call; ++}; ++ ++/* single container for all structs ++ * one verifier_env per bpf_check() call ++ */ ++struct bpf_verifier_env { ++ u32 insn_idx; ++ u32 prev_insn_idx; ++ struct bpf_prog *prog; /* eBPF program being verified */ ++ const struct bpf_verifier_ops *ops; ++ struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ ++ int stack_size; /* number of states to be processed */ ++ bool strict_alignment; /* perform strict pointer alignment checks */ ++ bool test_state_freq; /* test verifier with different pruning frequency */ ++ struct bpf_verifier_state *cur_state; /* current verifier state */ ++ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ ++ struct bpf_verifier_state_list *free_list; ++ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ ++ u32 used_map_cnt; /* number of used maps */ ++ u32 id_gen; /* used to generate unique reg IDs */ ++ bool allow_ptr_leaks; ++ bool seen_direct_write; ++ struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ ++ const struct bpf_line_info *prev_linfo; ++ struct bpf_verifier_log log; ++ struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; ++ struct { ++ int *insn_state; ++ int *insn_stack; ++ int cur_stack; ++ } cfg; ++ u32 subprog_cnt; ++ /* number of instructions analyzed by the verifier */ ++ u32 prev_insn_processed, insn_processed; ++ /* number of jmps, calls, exits analyzed so far */ ++ u32 prev_jmps_processed, jmps_processed; ++ /* total verification time */ ++ u64 verification_time; ++ /* maximum number of verifier states kept in 'branching' instructions */ ++ u32 max_states_per_insn; ++ /* total number of allocated verifier states */ ++ u32 total_states; ++ /* some states are freed during program analysis. ++ * this is peak number of states. this number dominates kernel ++ * memory consumption during verification ++ */ ++ u32 peak_states; ++ /* longest register parentage chain walked for liveness marking */ ++ u32 longest_mark_read_walk; ++}; ++ ++__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, ++ const char *fmt, va_list args); ++__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, ++ const char *fmt, ...); ++ ++static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) ++{ ++ struct bpf_verifier_state *cur = env->cur_state; ++ ++ return cur->frame[cur->curframe]; ++} ++ ++static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) ++{ ++ return cur_func(env)->regs; ++} ++ ++int bpf_prog_offload_verifier_prep(struct bpf_prog *prog); ++int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, ++ int insn_idx, int prev_insn_idx); ++int bpf_prog_offload_finalize(struct bpf_verifier_env *env); ++void ++bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, ++ struct bpf_insn *insn); ++void ++bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); ++ ++#endif /* _LINUX_BPF_VERIFIER_H */ +--- /dev/null ++++ b/include/linux/btf.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* Copyright (c) 2018 Facebook */ ++ ++#ifndef _LINUX_BTF_H ++#define _LINUX_BTF_H 1 ++ ++#include ++ ++struct btf; ++struct btf_member; ++struct btf_type; ++union bpf_attr; ++ ++extern const struct file_operations btf_fops; ++ ++void btf_put(struct btf *btf); ++int btf_new_fd(const union bpf_attr *attr); ++struct btf *btf_get_by_fd(int fd); ++int btf_get_info_by_fd(const struct btf *btf, ++ const union bpf_attr *attr, ++ union bpf_attr __user *uattr); ++/* Figure out the size of a type_id. If type_id is a modifier ++ * (e.g. const), it will be resolved to find out the type with size. ++ * ++ * For example: ++ * In describing "const void *", type_id is "const" and "const" ++ * refers to "void *". The return type will be "void *". ++ * ++ * If type_id is a simple "int", then return type will be "int". ++ * ++ * @btf: struct btf object ++ * @type_id: Find out the size of type_id. The type_id of the return ++ * type is set to *type_id. ++ * @ret_size: It can be NULL. If not NULL, the size of the return ++ * type is set to *ret_size. ++ * Return: The btf_type (resolved to another type with size info if needed). ++ * NULL is returned if type_id itself does not have size info ++ * (e.g. void) or it cannot be resolved to another type that ++ * has size info. ++ * *type_id and *ret_size will not be changed in the ++ * NULL return case. ++ */ ++const struct btf_type *btf_type_id_size(const struct btf *btf, ++ u32 *type_id, ++ u32 *ret_size); ++void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, ++ struct seq_file *m); ++int btf_get_fd_by_id(u32 id); ++u32 btf_id(const struct btf *btf); ++bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, ++ const struct btf_member *m, ++ u32 expected_offset, u32 expected_size); ++int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); ++bool btf_type_is_void(const struct btf_type *t); ++ ++#ifdef CONFIG_BPF_SYSCALL ++const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); ++const char *btf_name_by_offset(const struct btf *btf, u32 offset); ++#else ++static inline const struct btf_type *btf_type_by_id(const struct btf *btf, ++ u32 type_id) ++{ ++ return NULL; ++} ++static inline const char *btf_name_by_offset(const struct btf *btf, ++ u32 offset) ++{ ++ return NULL; ++} ++#endif ++ ++#endif +--- a/include/uapi/linux/bpf_common.h ++++ b/include/uapi/linux/bpf_common.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef _UAPI__LINUX_BPF_COMMON_H__ + #define _UAPI__LINUX_BPF_COMMON_H__ + +@@ -14,9 +15,10 @@ + + /* ld/ldx fields */ + #define BPF_SIZE(code) ((code) & 0x18) +-#define BPF_W 0x00 +-#define BPF_H 0x08 +-#define BPF_B 0x10 ++#define BPF_W 0x00 /* 32-bit */ ++#define BPF_H 0x08 /* 16-bit */ ++#define BPF_B 0x10 /* 8-bit */ ++/* eBPF BPF_DW 0x18 64-bit */ + #define BPF_MODE(code) ((code) & 0xe0) + #define BPF_IMM 0x00 + #define BPF_ABS 0x20 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or +@@ -13,10 +14,11 @@ + /* Extended instruction set based on top of classic BPF */ + + /* instruction classes */ ++#define BPF_JMP32 0x06 /* jmp mode in word width */ + #define BPF_ALU64 0x07 /* alu mode in double word width */ + + /* ld/ldx fields */ +-#define BPF_DW 0x18 /* double word */ ++#define BPF_DW 0x18 /* double word (64-bit) */ + #define BPF_XADD 0xc0 /* exclusive add */ + + /* alu/jmp fields */ +@@ -30,9 +32,14 @@ + #define BPF_FROM_LE BPF_TO_LE + #define BPF_FROM_BE BPF_TO_BE + ++/* jmp encodings */ + #define BPF_JNE 0x50 /* jump != */ ++#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ ++#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ + #define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ + #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ ++#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ ++#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ + #define BPF_CALL 0x80 /* function call */ + #define BPF_EXIT 0x90 /* function return */ + +@@ -63,6 +70,17 @@ struct bpf_insn { + __s32 imm; /* signed immediate constant */ + }; + ++/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ ++struct bpf_lpm_trie_key { ++ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ ++ __u8 data[0]; /* Arbitrary size */ ++}; ++ ++struct bpf_cgroup_storage_key { ++ __u64 cgroup_inode_id; /* cgroup inode id */ ++ __u32 attach_type; /* program attach type */ ++}; ++ + /* BPF syscall commands, see bpf(2) man-page for details. */ + enum bpf_cmd { + BPF_MAP_CREATE, +@@ -73,6 +91,22 @@ enum bpf_cmd { + BPF_PROG_LOAD, + BPF_OBJ_PIN, + BPF_OBJ_GET, ++ BPF_PROG_ATTACH, ++ BPF_PROG_DETACH, ++ BPF_PROG_TEST_RUN, ++ BPF_PROG_GET_NEXT_ID, ++ BPF_MAP_GET_NEXT_ID, ++ BPF_PROG_GET_FD_BY_ID, ++ BPF_MAP_GET_FD_BY_ID, ++ BPF_OBJ_GET_INFO_BY_FD, ++ BPF_PROG_QUERY, ++ BPF_RAW_TRACEPOINT_OPEN, ++ BPF_BTF_LOAD, ++ BPF_BTF_GET_FD_BY_ID, ++ BPF_TASK_FD_QUERY, ++ BPF_MAP_LOOKUP_AND_DELETE_ELEM, ++ BPF_MAP_FREEZE, ++ BPF_BTF_GET_NEXT_ID, + }; + + enum bpf_map_type { +@@ -81,22 +115,256 @@ enum bpf_map_type { + BPF_MAP_TYPE_ARRAY, + BPF_MAP_TYPE_PROG_ARRAY, + BPF_MAP_TYPE_PERF_EVENT_ARRAY, ++ BPF_MAP_TYPE_PERCPU_HASH, ++ BPF_MAP_TYPE_PERCPU_ARRAY, ++ BPF_MAP_TYPE_STACK_TRACE, ++ BPF_MAP_TYPE_CGROUP_ARRAY, ++ BPF_MAP_TYPE_LRU_HASH, ++ BPF_MAP_TYPE_LRU_PERCPU_HASH, ++ BPF_MAP_TYPE_LPM_TRIE, ++ BPF_MAP_TYPE_ARRAY_OF_MAPS, ++ BPF_MAP_TYPE_HASH_OF_MAPS, ++ BPF_MAP_TYPE_DEVMAP, ++ BPF_MAP_TYPE_SOCKMAP, ++ BPF_MAP_TYPE_CPUMAP, ++ BPF_MAP_TYPE_XSKMAP, ++ BPF_MAP_TYPE_SOCKHASH, ++ BPF_MAP_TYPE_CGROUP_STORAGE, ++ BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, ++ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, ++ BPF_MAP_TYPE_QUEUE, ++ BPF_MAP_TYPE_STACK, ++ BPF_MAP_TYPE_SK_STORAGE, ++ BPF_MAP_TYPE_DEVMAP_HASH, + }; + ++/* Note that tracing related programs such as ++ * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} ++ * are not subject to a stable API since kernel internal data ++ * structures can change from release to release and may ++ * therefore break existing tracing BPF programs. Tracing BPF ++ * programs correspond to /a/ specific kernel which is to be ++ * analyzed, and not /a/ specific kernel /and/ all future ones. ++ */ + enum bpf_prog_type { + BPF_PROG_TYPE_UNSPEC, + BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, + BPF_PROG_TYPE_SCHED_CLS, + BPF_PROG_TYPE_SCHED_ACT, ++ BPF_PROG_TYPE_TRACEPOINT, ++ BPF_PROG_TYPE_XDP, ++ BPF_PROG_TYPE_PERF_EVENT, ++ BPF_PROG_TYPE_CGROUP_SKB, ++ BPF_PROG_TYPE_CGROUP_SOCK, ++ BPF_PROG_TYPE_LWT_IN, ++ BPF_PROG_TYPE_LWT_OUT, ++ BPF_PROG_TYPE_LWT_XMIT, ++ BPF_PROG_TYPE_SOCK_OPS, ++ BPF_PROG_TYPE_SK_SKB, ++ BPF_PROG_TYPE_CGROUP_DEVICE, ++ BPF_PROG_TYPE_SK_MSG, ++ BPF_PROG_TYPE_RAW_TRACEPOINT, ++ BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ++ BPF_PROG_TYPE_LWT_SEG6LOCAL, ++ BPF_PROG_TYPE_LIRC_MODE2, ++ BPF_PROG_TYPE_SK_REUSEPORT, ++ BPF_PROG_TYPE_FLOW_DISSECTOR, ++ BPF_PROG_TYPE_CGROUP_SYSCTL, ++ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, ++ BPF_PROG_TYPE_CGROUP_SOCKOPT, + }; + ++enum bpf_attach_type { ++ BPF_CGROUP_INET_INGRESS, ++ BPF_CGROUP_INET_EGRESS, ++ BPF_CGROUP_INET_SOCK_CREATE, ++ BPF_CGROUP_SOCK_OPS, ++ BPF_SK_SKB_STREAM_PARSER, ++ BPF_SK_SKB_STREAM_VERDICT, ++ BPF_CGROUP_DEVICE, ++ BPF_SK_MSG_VERDICT, ++ BPF_CGROUP_INET4_BIND, ++ BPF_CGROUP_INET6_BIND, ++ BPF_CGROUP_INET4_CONNECT, ++ BPF_CGROUP_INET6_CONNECT, ++ BPF_CGROUP_INET4_POST_BIND, ++ BPF_CGROUP_INET6_POST_BIND, ++ BPF_CGROUP_UDP4_SENDMSG, ++ BPF_CGROUP_UDP6_SENDMSG, ++ BPF_LIRC_MODE2, ++ BPF_FLOW_DISSECTOR, ++ BPF_CGROUP_SYSCTL, ++ BPF_CGROUP_UDP4_RECVMSG, ++ BPF_CGROUP_UDP6_RECVMSG, ++ BPF_CGROUP_GETSOCKOPT, ++ BPF_CGROUP_SETSOCKOPT, ++ __MAX_BPF_ATTACH_TYPE ++}; ++ ++#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE ++ ++/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command ++ * ++ * NONE(default): No further bpf programs allowed in the subtree. ++ * ++ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, ++ * the program in this cgroup yields to sub-cgroup program. ++ * ++ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, ++ * that cgroup program gets run in addition to the program in this cgroup. ++ * ++ * Only one program is allowed to be attached to a cgroup with ++ * NONE or BPF_F_ALLOW_OVERRIDE flag. ++ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will ++ * release old program and attach the new one. Attach flags has to match. ++ * ++ * Multiple programs are allowed to be attached to a cgroup with ++ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order ++ * (those that were attached first, run first) ++ * The programs of sub-cgroup are executed first, then programs of ++ * this cgroup and then programs of parent cgroup. ++ * When children program makes decision (like picking TCP CA or sock bind) ++ * parent program has a chance to override it. ++ * ++ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. ++ * A cgroup with NONE doesn't allow any programs in sub-cgroups. ++ * Ex1: ++ * cgrp1 (MULTI progs A, B) -> ++ * cgrp2 (OVERRIDE prog C) -> ++ * cgrp3 (MULTI prog D) -> ++ * cgrp4 (OVERRIDE prog E) -> ++ * cgrp5 (NONE prog F) ++ * the event in cgrp5 triggers execution of F,D,A,B in that order. ++ * if prog F is detached, the execution is E,D,A,B ++ * if prog F and D are detached, the execution is E,A,B ++ * if prog F, E and D are detached, the execution is C,A,B ++ * ++ * All eligible programs are executed regardless of return code from ++ * earlier programs. ++ */ ++#define BPF_F_ALLOW_OVERRIDE (1U << 0) ++#define BPF_F_ALLOW_MULTI (1U << 1) ++ ++/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the ++ * verifier will perform strict alignment checking as if the kernel ++ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, ++ * and NET_IP_ALIGN defined to 2. ++ */ ++#define BPF_F_STRICT_ALIGNMENT (1U << 0) ++ ++/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the ++ * verifier will allow any alignment whatsoever. On platforms ++ * with strict alignment requirements for loads ands stores (such ++ * as sparc and mips) the verifier validates that all loads and ++ * stores provably follow this requirement. This flag turns that ++ * checking and enforcement off. ++ * ++ * It is mostly used for testing when we want to validate the ++ * context and memory access aspects of the verifier, but because ++ * of an unaligned access the alignment check would trigger before ++ * the one we are interested in. ++ */ ++#define BPF_F_ANY_ALIGNMENT (1U << 1) ++ ++/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose. ++ * Verifier does sub-register def/use analysis and identifies instructions whose ++ * def only matters for low 32-bit, high 32-bit is never referenced later ++ * through implicit zero extension. Therefore verifier notifies JIT back-ends ++ * that it is safe to ignore clearing high 32-bit for these instructions. This ++ * saves some back-ends a lot of code-gen. However such optimization is not ++ * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends ++ * hence hasn't used verifier's analysis result. But, we really want to have a ++ * way to be able to verify the correctness of the described optimization on ++ * x86_64 on which testsuites are frequently exercised. ++ * ++ * So, this flag is introduced. Once it is set, verifier will randomize high ++ * 32-bit for those instructions who has been identified as safe to ignore them. ++ * Then, if verifier is not doing correct analysis, such randomization will ++ * regress tests to expose bugs. ++ */ ++#define BPF_F_TEST_RND_HI32 (1U << 2) ++ ++/* The verifier internal test flag. Behavior is undefined */ ++#define BPF_F_TEST_STATE_FREQ (1U << 3) ++ ++/* When BPF ldimm64's insn[0].src_reg != 0 then this can have ++ * two extensions: ++ * ++ * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE ++ * insn[0].imm: map fd map fd ++ * insn[1].imm: 0 offset into value ++ * insn[0].off: 0 0 ++ * insn[1].off: 0 0 ++ * ldimm64 rewrite: address of map address of map[0]+offset ++ * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE ++ */ + #define BPF_PSEUDO_MAP_FD 1 ++#define BPF_PSEUDO_MAP_VALUE 2 ++ ++/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative ++ * offset to another bpf function ++ */ ++#define BPF_PSEUDO_CALL 1 + + /* flags for BPF_MAP_UPDATE_ELEM command */ + #define BPF_ANY 0 /* create new element or update existing */ + #define BPF_NOEXIST 1 /* create new element if it didn't exist */ + #define BPF_EXIST 2 /* update existing element */ ++#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ ++ ++/* flags for BPF_MAP_CREATE command */ ++#define BPF_F_NO_PREALLOC (1U << 0) ++/* Instead of having one common LRU list in the ++ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list ++ * which can scale and perform better. ++ * Note, the LRU nodes (including free nodes) cannot be moved ++ * across different LRU lists. ++ */ ++#define BPF_F_NO_COMMON_LRU (1U << 1) ++/* Specify numa node during map creation */ ++#define BPF_F_NUMA_NODE (1U << 2) ++ ++#define BPF_OBJ_NAME_LEN 16U ++ ++/* Flags for accessing BPF object from syscall side. */ ++#define BPF_F_RDONLY (1U << 3) ++#define BPF_F_WRONLY (1U << 4) ++ ++/* Flag for stack_map, store build_id+offset instead of pointer */ ++#define BPF_F_STACK_BUILD_ID (1U << 5) ++ ++/* Zero-initialize hash function seed. This should only be used for testing. */ ++#define BPF_F_ZERO_SEED (1U << 6) ++ ++/* Flags for accessing BPF object from program side. */ ++#define BPF_F_RDONLY_PROG (1U << 7) ++#define BPF_F_WRONLY_PROG (1U << 8) ++ ++/* Clone map from listener for newly accepted socket */ ++#define BPF_F_CLONE (1U << 9) ++ ++/* flags for BPF_PROG_QUERY */ ++#define BPF_F_QUERY_EFFECTIVE (1U << 0) ++ ++enum bpf_stack_build_id_status { ++ /* user space need an empty entry to identify end of a trace */ ++ BPF_STACK_BUILD_ID_EMPTY = 0, ++ /* with valid build_id and offset */ ++ BPF_STACK_BUILD_ID_VALID = 1, ++ /* couldn't get build_id, fallback to ip */ ++ BPF_STACK_BUILD_ID_IP = 2, ++}; ++ ++#define BPF_BUILD_ID_SIZE 20 ++struct bpf_stack_build_id { ++ __s32 status; ++ unsigned char build_id[BPF_BUILD_ID_SIZE]; ++ union { ++ __u64 offset; ++ __u64 ip; ++ }; ++}; + + union bpf_attr { + struct { /* anonymous struct used by BPF_MAP_CREATE command */ +@@ -104,6 +372,18 @@ union bpf_attr { + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ ++ __u32 map_flags; /* BPF_MAP_CREATE related ++ * flags defined above. ++ */ ++ __u32 inner_map_fd; /* fd pointing to the inner map */ ++ __u32 numa_node; /* numa node (effective only if ++ * BPF_F_NUMA_NODE is set). ++ */ ++ char map_name[BPF_OBJ_NAME_LEN]; ++ __u32 map_ifindex; /* ifindex of netdev to create on */ ++ __u32 btf_fd; /* fd pointing to a BTF type data */ ++ __u32 btf_key_type_id; /* BTF type_id of the key */ ++ __u32 btf_value_type_id; /* BTF type_id of the value */ + }; + + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ +@@ -124,154 +404,2568 @@ union bpf_attr { + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ +- __u32 kern_version; /* checked when prog_type=kprobe */ ++ __u32 kern_version; /* not used */ ++ __u32 prog_flags; ++ char prog_name[BPF_OBJ_NAME_LEN]; ++ __u32 prog_ifindex; /* ifindex of netdev to prep for */ ++ /* For some prog types expected attach type must be known at ++ * load time to verify attach type specific parts of prog ++ * (context accesses, allowed helpers, etc). ++ */ ++ __u32 expected_attach_type; ++ __u32 prog_btf_fd; /* fd pointing to BTF type data */ ++ __u32 func_info_rec_size; /* userspace bpf_func_info size */ ++ __aligned_u64 func_info; /* func info */ ++ __u32 func_info_cnt; /* number of bpf_func_info records */ ++ __u32 line_info_rec_size; /* userspace bpf_line_info size */ ++ __aligned_u64 line_info; /* line info */ ++ __u32 line_info_cnt; /* number of bpf_line_info records */ + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ + __aligned_u64 pathname; + __u32 bpf_fd; ++ __u32 file_flags; ++ }; ++ ++ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ ++ __u32 target_fd; /* container object to attach to */ ++ __u32 attach_bpf_fd; /* eBPF program to attach */ ++ __u32 attach_type; ++ __u32 attach_flags; ++ }; ++ ++ struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ ++ __u32 prog_fd; ++ __u32 retval; ++ __u32 data_size_in; /* input: len of data_in */ ++ __u32 data_size_out; /* input/output: len of data_out ++ * returns ENOSPC if data_out ++ * is too small. ++ */ ++ __aligned_u64 data_in; ++ __aligned_u64 data_out; ++ __u32 repeat; ++ __u32 duration; ++ __u32 ctx_size_in; /* input: len of ctx_in */ ++ __u32 ctx_size_out; /* input/output: len of ctx_out ++ * returns ENOSPC if ctx_out ++ * is too small. ++ */ ++ __aligned_u64 ctx_in; ++ __aligned_u64 ctx_out; ++ } test; ++ ++ struct { /* anonymous struct used by BPF_*_GET_*_ID */ ++ union { ++ __u32 start_id; ++ __u32 prog_id; ++ __u32 map_id; ++ __u32 btf_id; ++ }; ++ __u32 next_id; ++ __u32 open_flags; + }; ++ ++ struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ ++ __u32 bpf_fd; ++ __u32 info_len; ++ __aligned_u64 info; ++ } info; ++ ++ struct { /* anonymous struct used by BPF_PROG_QUERY command */ ++ __u32 target_fd; /* container object to query */ ++ __u32 attach_type; ++ __u32 query_flags; ++ __u32 attach_flags; ++ __aligned_u64 prog_ids; ++ __u32 prog_cnt; ++ } query; ++ ++ struct { ++ __u64 name; ++ __u32 prog_fd; ++ } raw_tracepoint; ++ ++ struct { /* anonymous struct for BPF_BTF_LOAD */ ++ __aligned_u64 btf; ++ __aligned_u64 btf_log_buf; ++ __u32 btf_size; ++ __u32 btf_log_size; ++ __u32 btf_log_level; ++ }; ++ ++ struct { ++ __u32 pid; /* input: pid */ ++ __u32 fd; /* input: fd */ ++ __u32 flags; /* input: flags */ ++ __u32 buf_len; /* input/output: buf len */ ++ __aligned_u64 buf; /* input/output: ++ * tp_name for tracepoint ++ * symbol for kprobe ++ * filename for uprobe ++ */ ++ __u32 prog_id; /* output: prod_id */ ++ __u32 fd_type; /* output: BPF_FD_TYPE_* */ ++ __u64 probe_offset; /* output: probe_offset */ ++ __u64 probe_addr; /* output: probe_addr */ ++ } task_fd_query; + } __attribute__((aligned(8))); + ++/* The description below is an attempt at providing documentation to eBPF ++ * developers about the multiple available eBPF helper functions. It can be ++ * parsed and used to produce a manual page. The workflow is the following, ++ * and requires the rst2man utility: ++ * ++ * $ ./scripts/bpf_helpers_doc.py \ ++ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst ++ * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 ++ * $ man /tmp/bpf-helpers.7 ++ * ++ * Note that in order to produce this external documentation, some RST ++ * formatting is used in the descriptions to get "bold" and "italics" in ++ * manual pages. Also note that the few trailing white spaces are ++ * intentional, removing them would break paragraphs for rst2man. ++ * ++ * Start of BPF helper function descriptions: ++ * ++ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) ++ * Description ++ * Perform a lookup in *map* for an entry associated to *key*. ++ * Return ++ * Map value associated to *key*, or **NULL** if no entry was ++ * found. ++ * ++ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) ++ * Description ++ * Add or update the value of the entry associated to *key* in ++ * *map* with *value*. *flags* is one of: ++ * ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. ++ * ++ * Flag value **BPF_NOEXIST** cannot be used for maps of types ++ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all ++ * elements always exist), the helper would return an error. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_map_delete_elem(struct bpf_map *map, const void *key) ++ * Description ++ * Delete entry with *key* from *map*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_probe_read(void *dst, u32 size, const void *src) ++ * Description ++ * For tracing programs, safely attempt to read *size* bytes from ++ * address *src* and store the data in *dst*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u64 bpf_ktime_get_ns(void) ++ * Description ++ * Return the time elapsed since system boot, in nanoseconds. ++ * Return ++ * Current *ktime*. ++ * ++ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) ++ * Description ++ * This helper is a "printk()-like" facility for debugging. It ++ * prints a message defined by format *fmt* (of size *fmt_size*) ++ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if ++ * available. It can take up to three additional **u64** ++ * arguments (as an eBPF helpers, the total number of arguments is ++ * limited to five). ++ * ++ * Each time the helper is called, it appends a line to the trace. ++ * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is ++ * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. ++ * The format of the trace is customizable, and the exact output ++ * one will get depends on the options set in ++ * *\/sys/kernel/debug/tracing/trace_options* (see also the ++ * *README* file under the same directory). However, it usually ++ * defaults to something like: ++ * ++ * :: ++ * ++ * telnet-470 [001] .N.. 419421.045894: 0x00000001: ++ * ++ * In the above: ++ * ++ * * ``telnet`` is the name of the current task. ++ * * ``470`` is the PID of the current task. ++ * * ``001`` is the CPU number on which the task is ++ * running. ++ * * In ``.N..``, each character refers to a set of ++ * options (whether irqs are enabled, scheduling ++ * options, whether hard/softirqs are running, level of ++ * preempt_disabled respectively). **N** means that ++ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** ++ * are set. ++ * * ``419421.045894`` is a timestamp. ++ * * ``0x00000001`` is a fake value used by BPF for the ++ * instruction pointer register. ++ * * ```` is the message formatted with ++ * *fmt*. ++ * ++ * The conversion specifiers supported by *fmt* are similar, but ++ * more limited than for printk(). They are **%d**, **%i**, ++ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, ++ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size ++ * of field, padding with zeroes, etc.) is available, and the ++ * helper will return **-EINVAL** (but print nothing) if it ++ * encounters an unknown specifier. ++ * ++ * Also, note that **bpf_trace_printk**\ () is slow, and should ++ * only be used for debugging purposes. For this reason, a notice ++ * bloc (spanning several lines) is printed to kernel logs and ++ * states that the helper should not be used "for production use" ++ * the first time this helper is used (or more precisely, when ++ * **trace_printk**\ () buffers are allocated). For passing values ++ * to user space, perf events should be preferred. ++ * Return ++ * The number of bytes written to the buffer, or a negative error ++ * in case of failure. ++ * ++ * u32 bpf_get_prandom_u32(void) ++ * Description ++ * Get a pseudo-random number. ++ * ++ * From a security point of view, this helper uses its own ++ * pseudo-random internal state, and cannot be used to infer the ++ * seed of other random functions in the kernel. However, it is ++ * essential to note that the generator used by the helper is not ++ * cryptographically secure. ++ * Return ++ * A random 32-bit unsigned value. ++ * ++ * u32 bpf_get_smp_processor_id(void) ++ * Description ++ * Get the SMP (symmetric multiprocessing) processor id. Note that ++ * all programs run with preemption disabled, which means that the ++ * SMP processor id is stable during all the execution of the ++ * program. ++ * Return ++ * The SMP id of the processor running the program. ++ * ++ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) ++ * Description ++ * Store *len* bytes from address *from* into the packet ++ * associated to *skb*, at *offset*. *flags* are a combination of ++ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the ++ * checksum for the packet after storing the bytes) and ++ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ ++ * **->swhash** and *skb*\ **->l4hash** to 0). ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) ++ * Description ++ * Recompute the layer 3 (e.g. IP) checksum for the packet ++ * associated to *skb*. Computation is incremental, so the helper ++ * must know the former value of the header field that was ++ * modified (*from*), the new value of this field (*to*), and the ++ * number of bytes (2 or 4) for this field, stored in *size*. ++ * Alternatively, it is possible to store the difference between ++ * the previous and the new values of the header field in *to*, by ++ * setting *from* and *size* to 0. For both methods, *offset* ++ * indicates the location of the IP checksum within the packet. ++ * ++ * This helper works in combination with **bpf_csum_diff**\ (), ++ * which does not update the checksum in-place, but offers more ++ * flexibility and can handle sizes larger than 2 or 4 for the ++ * checksum to update. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) ++ * Description ++ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the ++ * packet associated to *skb*. Computation is incremental, so the ++ * helper must know the former value of the header field that was ++ * modified (*from*), the new value of this field (*to*), and the ++ * number of bytes (2 or 4) for this field, stored on the lowest ++ * four bits of *flags*. Alternatively, it is possible to store ++ * the difference between the previous and the new values of the ++ * header field in *to*, by setting *from* and the four lowest ++ * bits of *flags* to 0. For both methods, *offset* indicates the ++ * location of the IP checksum within the packet. In addition to ++ * the size of the field, *flags* can be added (bitwise OR) actual ++ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left ++ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and ++ * for updates resulting in a null checksum the value is set to ++ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates ++ * the checksum is to be computed against a pseudo-header. ++ * ++ * This helper works in combination with **bpf_csum_diff**\ (), ++ * which does not update the checksum in-place, but offers more ++ * flexibility and can handle sizes larger than 2 or 4 for the ++ * checksum to update. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) ++ * Description ++ * This special helper is used to trigger a "tail call", or in ++ * other words, to jump into another eBPF program. The same stack ++ * frame is used (but values on stack and in registers for the ++ * caller are not accessible to the callee). This mechanism allows ++ * for program chaining, either for raising the maximum number of ++ * available eBPF instructions, or to execute given programs in ++ * conditional blocks. For security reasons, there is an upper ++ * limit to the number of successive tail calls that can be ++ * performed. ++ * ++ * Upon call of this helper, the program attempts to jump into a ++ * program referenced at index *index* in *prog_array_map*, a ++ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes ++ * *ctx*, a pointer to the context. ++ * ++ * If the call succeeds, the kernel immediately runs the first ++ * instruction of the new program. This is not a function call, ++ * and it never returns to the previous program. If the call ++ * fails, then the helper has no effect, and the caller continues ++ * to run its subsequent instructions. A call can fail if the ++ * destination program for the jump does not exist (i.e. *index* ++ * is superior to the number of entries in *prog_array_map*), or ++ * if the maximum number of tail calls has been reached for this ++ * chain of programs. This limit is defined in the kernel by the ++ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), ++ * which is currently set to 32. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) ++ * Description ++ * Clone and redirect the packet associated to *skb* to another ++ * net device of index *ifindex*. Both ingress and egress ++ * interfaces can be used for redirection. The **BPF_F_INGRESS** ++ * value in *flags* is used to make the distinction (ingress path ++ * is selected if the flag is present, egress path otherwise). ++ * This is the only flag supported for now. ++ * ++ * In comparison with **bpf_redirect**\ () helper, ++ * **bpf_clone_redirect**\ () has the associated cost of ++ * duplicating the packet buffer, but this can be executed out of ++ * the eBPF program. Conversely, **bpf_redirect**\ () is more ++ * efficient, but it is handled through an action code where the ++ * redirection happens only after the eBPF program has returned. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u64 bpf_get_current_pid_tgid(void) ++ * Return ++ * A 64-bit integer containing the current tgid and pid, and ++ * created as such: ++ * *current_task*\ **->tgid << 32 \|** ++ * *current_task*\ **->pid**. ++ * ++ * u64 bpf_get_current_uid_gid(void) ++ * Return ++ * A 64-bit integer containing the current GID and UID, and ++ * created as such: *current_gid* **<< 32 \|** *current_uid*. ++ * ++ * int bpf_get_current_comm(char *buf, u32 size_of_buf) ++ * Description ++ * Copy the **comm** attribute of the current task into *buf* of ++ * *size_of_buf*. The **comm** attribute contains the name of ++ * the executable (excluding the path) for the current task. The ++ * *size_of_buf* must be strictly positive. On success, the ++ * helper makes sure that the *buf* is NUL-terminated. On failure, ++ * it is filled with zeroes. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u32 bpf_get_cgroup_classid(struct sk_buff *skb) ++ * Description ++ * Retrieve the classid for the current task, i.e. for the net_cls ++ * cgroup to which *skb* belongs. ++ * ++ * This helper can be used on TC egress path, but not on ingress. ++ * ++ * The net_cls cgroup provides an interface to tag network packets ++ * based on a user-provided identifier for all traffic coming from ++ * the tasks belonging to the related cgroup. See also the related ++ * kernel documentation, available from the Linux sources in file ++ * *Documentation/admin-guide/cgroup-v1/net_cls.rst*. ++ * ++ * The Linux kernel has two versions for cgroups: there are ++ * cgroups v1 and cgroups v2. Both are available to users, who can ++ * use a mixture of them, but note that the net_cls cgroup is for ++ * cgroup v1 only. This makes it incompatible with BPF programs ++ * run on cgroups, which is a cgroup-v2-only feature (a socket can ++ * only hold data for one version of cgroups at a time). ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to ++ * "**y**" or to "**m**". ++ * Return ++ * The classid, or 0 for the default unconfigured classid. ++ * ++ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) ++ * Description ++ * Push a *vlan_tci* (VLAN tag control information) of protocol ++ * *vlan_proto* to the packet associated to *skb*, then update ++ * the checksum. Note that if *vlan_proto* is different from ++ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to ++ * be **ETH_P_8021Q**. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_vlan_pop(struct sk_buff *skb) ++ * Description ++ * Pop a VLAN header from the packet associated to *skb*. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) ++ * Description ++ * Get tunnel metadata. This helper takes a pointer *key* to an ++ * empty **struct bpf_tunnel_key** of **size**, that will be ++ * filled with tunnel metadata for the packet associated to *skb*. ++ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which ++ * indicates that the tunnel is based on IPv6 protocol instead of ++ * IPv4. ++ * ++ * The **struct bpf_tunnel_key** is an object that generalizes the ++ * principal parameters used by various tunneling protocols into a ++ * single struct. This way, it can be used to easily make a ++ * decision based on the contents of the encapsulation header, ++ * "summarized" in this struct. In particular, it holds the IP ++ * address of the remote end (IPv4 or IPv6, depending on the case) ++ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, ++ * this struct exposes the *key*\ **->tunnel_id**, which is ++ * generally mapped to a VNI (Virtual Network Identifier), making ++ * it programmable together with the **bpf_skb_set_tunnel_key**\ ++ * () helper. ++ * ++ * Let's imagine that the following code is part of a program ++ * attached to the TC ingress interface, on one end of a GRE ++ * tunnel, and is supposed to filter out all messages coming from ++ * remote ends with IPv4 address other than 10.0.0.1: ++ * ++ * :: ++ * ++ * int ret; ++ * struct bpf_tunnel_key key = {}; ++ * ++ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); ++ * if (ret < 0) ++ * return TC_ACT_SHOT; // drop packet ++ * ++ * if (key.remote_ipv4 != 0x0a000001) ++ * return TC_ACT_SHOT; // drop packet ++ * ++ * return TC_ACT_OK; // accept packet ++ * ++ * This interface can also be used with all encapsulation devices ++ * that can operate in "collect metadata" mode: instead of having ++ * one network device per specific configuration, the "collect ++ * metadata" mode only requires a single device where the ++ * configuration can be extracted from this helper. ++ * ++ * This can be used together with various tunnels such as VXLan, ++ * Geneve, GRE or IP in IP (IPIP). ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) ++ * Description ++ * Populate tunnel metadata for packet associated to *skb.* The ++ * tunnel metadata is set to the contents of *key*, of *size*. The ++ * *flags* can be set to a combination of the following values: ++ * ++ * **BPF_F_TUNINFO_IPV6** ++ * Indicate that the tunnel is based on IPv6 protocol ++ * instead of IPv4. ++ * **BPF_F_ZERO_CSUM_TX** ++ * For IPv4 packets, add a flag to tunnel metadata ++ * indicating that checksum computation should be skipped ++ * and checksum set to zeroes. ++ * **BPF_F_DONT_FRAGMENT** ++ * Add a flag to tunnel metadata indicating that the ++ * packet should not be fragmented. ++ * **BPF_F_SEQ_NUMBER** ++ * Add a flag to tunnel metadata indicating that a ++ * sequence number should be added to tunnel header before ++ * sending the packet. This flag was added for GRE ++ * encapsulation, but might be used with other protocols ++ * as well in the future. ++ * ++ * Here is a typical usage on the transmit path: ++ * ++ * :: ++ * ++ * struct bpf_tunnel_key key; ++ * populate key ... ++ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); ++ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); ++ * ++ * See also the description of the **bpf_skb_get_tunnel_key**\ () ++ * helper for additional information. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags) ++ * Description ++ * Read the value of a perf event counter. This helper relies on a ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of ++ * the perf event counter is selected when *map* is updated with ++ * perf event file descriptors. The *map* is an array whose size ++ * is the number of available CPUs, and each cell contains a value ++ * relative to one CPU. The value to retrieve is indicated by ++ * *flags*, that contains the index of the CPU to look up, masked ++ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to ++ * **BPF_F_CURRENT_CPU** to indicate that the value for the ++ * current CPU should be retrieved. ++ * ++ * Note that before Linux 4.13, only hardware perf event can be ++ * retrieved. ++ * ++ * Also, be aware that the newer helper ++ * **bpf_perf_event_read_value**\ () is recommended over ++ * **bpf_perf_event_read**\ () in general. The latter has some ABI ++ * quirks where error and counter value are used as a return code ++ * (which is wrong to do since ranges may overlap). This issue is ++ * fixed with **bpf_perf_event_read_value**\ (), which at the same ++ * time provides more features over the **bpf_perf_event_read**\ ++ * () interface. Please refer to the description of ++ * **bpf_perf_event_read_value**\ () for details. ++ * Return ++ * The value of the perf event counter read from the map, or a ++ * negative error code in case of failure. ++ * ++ * int bpf_redirect(u32 ifindex, u64 flags) ++ * Description ++ * Redirect the packet to another net device of index *ifindex*. ++ * This helper is somewhat similar to **bpf_clone_redirect**\ ++ * (), except that the packet is not cloned, which provides ++ * increased performance. ++ * ++ * Except for XDP, both ingress and egress interfaces can be used ++ * for redirection. The **BPF_F_INGRESS** value in *flags* is used ++ * to make the distinction (ingress path is selected if the flag ++ * is present, egress path otherwise). Currently, XDP only ++ * supports redirection to the egress interface, and accepts no ++ * flag at all. ++ * ++ * The same effect can be attained with the more generic ++ * **bpf_redirect_map**\ (), which requires specific maps to be ++ * used but offers better performance. ++ * Return ++ * For XDP, the helper returns **XDP_REDIRECT** on success or ++ * **XDP_ABORTED** on error. For other program types, the values ++ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on ++ * error. ++ * ++ * u32 bpf_get_route_realm(struct sk_buff *skb) ++ * Description ++ * Retrieve the realm or the route, that is to say the ++ * **tclassid** field of the destination for the *skb*. The ++ * indentifier retrieved is a user-provided tag, similar to the ++ * one used with the net_cls cgroup (see description for ++ * **bpf_get_cgroup_classid**\ () helper), but here this tag is ++ * held by a route (a destination entry), not by a task. ++ * ++ * Retrieving this identifier works with the clsact TC egress hook ++ * (see also **tc-bpf(8)**), or alternatively on conventional ++ * classful egress qdiscs, but not on TC ingress path. In case of ++ * clsact TC egress hook, this has the advantage that, internally, ++ * the destination entry has not been dropped yet in the transmit ++ * path. Therefore, the destination entry does not need to be ++ * artificially held via **netif_keep_dst**\ () for a classful ++ * qdisc until the *skb* is freed. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_IP_ROUTE_CLASSID** configuration option. ++ * Return ++ * The realm of the route for the packet associated to *skb*, or 0 ++ * if none was found. ++ * ++ * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) ++ * Description ++ * Write raw *data* blob into a special BPF perf event held by ++ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf ++ * event must have the following attributes: **PERF_SAMPLE_RAW** ++ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and ++ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. ++ * ++ * The *flags* are used to indicate the index in *map* for which ++ * the value must be put, masked with **BPF_F_INDEX_MASK**. ++ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** ++ * to indicate that the index of the current CPU core should be ++ * used. ++ * ++ * The value to write, of *size*, is passed through eBPF stack and ++ * pointed by *data*. ++ * ++ * The context of the program *ctx* needs also be passed to the ++ * helper. ++ * ++ * On user space, a program willing to read the values needs to ++ * call **perf_event_open**\ () on the perf event (either for ++ * one or for all CPUs) and to store the file descriptor into the ++ * *map*. This must be done before the eBPF program can send data ++ * into it. An example is available in file ++ * *samples/bpf/trace_output_user.c* in the Linux kernel source ++ * tree (the eBPF program counterpart is in ++ * *samples/bpf/trace_output_kern.c*). ++ * ++ * **bpf_perf_event_output**\ () achieves better performance ++ * than **bpf_trace_printk**\ () for sharing data with user ++ * space, and is much better suitable for streaming data from eBPF ++ * programs. ++ * ++ * Note that this helper is not restricted to tracing use cases ++ * and can be used with programs attached to TC or XDP as well, ++ * where it allows for passing data to user space listeners. Data ++ * can be: ++ * ++ * * Only custom structs, ++ * * Only the packet payload, or ++ * * A combination of both. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) ++ * Description ++ * This helper was provided as an easy way to load data from a ++ * packet. It can be used to load *len* bytes from *offset* from ++ * the packet associated to *skb*, into the buffer pointed by ++ * *to*. ++ * ++ * Since Linux 4.7, usage of this helper has mostly been replaced ++ * by "direct packet access", enabling packet data to be ++ * manipulated with *skb*\ **->data** and *skb*\ **->data_end** ++ * pointing respectively to the first byte of packet data and to ++ * the byte after the last byte of packet data. However, it ++ * remains useful if one wishes to read large quantities of data ++ * at once from a packet into the eBPF stack. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags) ++ * Description ++ * Walk a user or a kernel stack and return its id. To achieve ++ * this, the helper needs *ctx*, which is a pointer to the context ++ * on which the tracing program is executed, and a pointer to a ++ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. ++ * ++ * The last argument, *flags*, holds the number of stack frames to ++ * skip (from 0 to 255), masked with ++ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set ++ * a combination of the following flags: ++ * ++ * **BPF_F_USER_STACK** ++ * Collect a user space stack instead of a kernel stack. ++ * **BPF_F_FAST_STACK_CMP** ++ * Compare stacks by hash only. ++ * **BPF_F_REUSE_STACKID** ++ * If two different stacks hash into the same *stackid*, ++ * discard the old one. ++ * ++ * The stack id retrieved is a 32 bit long integer handle which ++ * can be further combined with other data (including other stack ++ * ids) and used as a key into maps. This can be useful for ++ * generating a variety of graphs (such as flame graphs or off-cpu ++ * graphs). ++ * ++ * For walking a stack, this helper is an improvement over ++ * **bpf_probe_read**\ (), which can be used with unrolled loops ++ * but is not efficient and consumes a lot of eBPF instructions. ++ * Instead, **bpf_get_stackid**\ () can collect up to ++ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that ++ * this limit can be controlled with the **sysctl** program, and ++ * that it should be manually increased in order to profile long ++ * user stacks (such as stacks for Java programs). To do so, use: ++ * ++ * :: ++ * ++ * # sysctl kernel.perf_event_max_stack= ++ * Return ++ * The positive or null stack id on success, or a negative error ++ * in case of failure. ++ * ++ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed) ++ * Description ++ * Compute a checksum difference, from the raw buffer pointed by ++ * *from*, of length *from_size* (that must be a multiple of 4), ++ * towards the raw buffer pointed by *to*, of size *to_size* ++ * (same remark). An optional *seed* can be added to the value ++ * (this can be cascaded, the seed may come from a previous call ++ * to the helper). ++ * ++ * This is flexible enough to be used in several ways: ++ * ++ * * With *from_size* == 0, *to_size* > 0 and *seed* set to ++ * checksum, it can be used when pushing new data. ++ * * With *from_size* > 0, *to_size* == 0 and *seed* set to ++ * checksum, it can be used when removing data from a packet. ++ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it ++ * can be used to compute a diff. Note that *from_size* and ++ * *to_size* do not need to be equal. ++ * ++ * This helper can be used in combination with ++ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to ++ * which one can feed in the difference computed with ++ * **bpf_csum_diff**\ (). ++ * Return ++ * The checksum result, or a negative error code in case of ++ * failure. ++ * ++ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) ++ * Description ++ * Retrieve tunnel options metadata for the packet associated to ++ * *skb*, and store the raw tunnel option data to the buffer *opt* ++ * of *size*. ++ * ++ * This helper can be used with encapsulation devices that can ++ * operate in "collect metadata" mode (please refer to the related ++ * note in the description of **bpf_skb_get_tunnel_key**\ () for ++ * more details). A particular example where this can be used is ++ * in combination with the Geneve encapsulation protocol, where it ++ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) ++ * and retrieving arbitrary TLVs (Type-Length-Value headers) from ++ * the eBPF program. This allows for full customization of these ++ * headers. ++ * Return ++ * The size of the option data retrieved. ++ * ++ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) ++ * Description ++ * Set tunnel options metadata for the packet associated to *skb* ++ * to the option data contained in the raw buffer *opt* of *size*. ++ * ++ * See also the description of the **bpf_skb_get_tunnel_opt**\ () ++ * helper for additional information. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) ++ * Description ++ * Change the protocol of the *skb* to *proto*. Currently ++ * supported are transition from IPv4 to IPv6, and from IPv6 to ++ * IPv4. The helper takes care of the groundwork for the ++ * transition, including resizing the socket buffer. The eBPF ++ * program is expected to fill the new headers, if any, via ++ * **skb_store_bytes**\ () and to recompute the checksums with ++ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ ++ * (). The main case for this helper is to perform NAT64 ++ * operations out of an eBPF program. ++ * ++ * Internally, the GSO type is marked as dodgy so that headers are ++ * checked and segments are recalculated by the GSO/GRO engine. ++ * The size for GSO target is adapted as well. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_change_type(struct sk_buff *skb, u32 type) ++ * Description ++ * Change the packet type for the packet associated to *skb*. This ++ * comes down to setting *skb*\ **->pkt_type** to *type*, except ++ * the eBPF program does not have a write access to *skb*\ ++ * **->pkt_type** beside this helper. Using a helper here allows ++ * for graceful handling of errors. ++ * ++ * The major use case is to change incoming *skb*s to ++ * **PACKET_HOST** in a programmatic way instead of having to ++ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for ++ * example. ++ * ++ * Note that *type* only allows certain values. At this time, they ++ * are: ++ * ++ * **PACKET_HOST** ++ * Packet is for us. ++ * **PACKET_BROADCAST** ++ * Send packet to all. ++ * **PACKET_MULTICAST** ++ * Send packet to group. ++ * **PACKET_OTHERHOST** ++ * Send packet to someone else. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) ++ * Description ++ * Check whether *skb* is a descendant of the cgroup2 held by ++ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. ++ * Return ++ * The return value depends on the result of the test, and can be: ++ * ++ * * 0, if the *skb* failed the cgroup2 descendant test. ++ * * 1, if the *skb* succeeded the cgroup2 descendant test. ++ * * A negative error code, if an error occurred. ++ * ++ * u32 bpf_get_hash_recalc(struct sk_buff *skb) ++ * Description ++ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is ++ * not set, in particular if the hash was cleared due to mangling, ++ * recompute this hash. Later accesses to the hash can be done ++ * directly with *skb*\ **->hash**. ++ * ++ * Calling **bpf_set_hash_invalid**\ (), changing a packet ++ * prototype with **bpf_skb_change_proto**\ (), or calling ++ * **bpf_skb_store_bytes**\ () with the ++ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear ++ * the hash and to trigger a new computation for the next call to ++ * **bpf_get_hash_recalc**\ (). ++ * Return ++ * The 32-bit hash. ++ * ++ * u64 bpf_get_current_task(void) ++ * Return ++ * A pointer to the current task struct. ++ * ++ * int bpf_probe_write_user(void *dst, const void *src, u32 len) ++ * Description ++ * Attempt in a safe way to write *len* bytes from the buffer ++ * *src* to *dst* in memory. It only works for threads that are in ++ * user context, and *dst* must be a valid user space address. ++ * ++ * This helper should not be used to implement any kind of ++ * security mechanism because of TOC-TOU attacks, but rather to ++ * debug, divert, and manipulate execution of semi-cooperative ++ * processes. ++ * ++ * Keep in mind that this feature is meant for experiments, and it ++ * has a risk of crashing the system and running programs. ++ * Therefore, when an eBPF program using this helper is attached, ++ * a warning including PID and process name is printed to kernel ++ * logs. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) ++ * Description ++ * Check whether the probe is being run is the context of a given ++ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by ++ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. ++ * Return ++ * The return value depends on the result of the test, and can be: ++ * ++ * * 0, if current task belongs to the cgroup2. ++ * * 1, if current task does not belong to the cgroup2. ++ * * A negative error code, if an error occurred. ++ * ++ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) ++ * Description ++ * Resize (trim or grow) the packet associated to *skb* to the ++ * new *len*. The *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * The basic idea is that the helper performs the needed work to ++ * change the size of the packet, then the eBPF program rewrites ++ * the rest via helpers like **bpf_skb_store_bytes**\ (), ++ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () ++ * and others. This helper is a slow path utility intended for ++ * replies with control messages. And because it is targeted for ++ * slow path, the helper itself can afford to be slow: it ++ * implicitly linearizes, unclones and drops offloads from the ++ * *skb*. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) ++ * Description ++ * Pull in non-linear data in case the *skb* is non-linear and not ++ * all of *len* are part of the linear section. Make *len* bytes ++ * from *skb* readable and writable. If a zero value is passed for ++ * *len*, then the whole length of the *skb* is pulled. ++ * ++ * This helper is only needed for reading and writing with direct ++ * packet access. ++ * ++ * For direct packet access, testing that offsets to access ++ * are within packet boundaries (test on *skb*\ **->data_end**) is ++ * susceptible to fail if offsets are invalid, or if the requested ++ * data is in non-linear parts of the *skb*. On failure the ++ * program can just bail out, or in the case of a non-linear ++ * buffer, use a helper to make the data available. The ++ * **bpf_skb_load_bytes**\ () helper is a first solution to access ++ * the data. Another one consists in using **bpf_skb_pull_data** ++ * to pull in once the non-linear parts, then retesting and ++ * eventually access the data. ++ * ++ * At the same time, this also makes sure the *skb* is uncloned, ++ * which is a necessary condition for direct write. As this needs ++ * to be an invariant for the write part only, the verifier ++ * detects writes and adds a prologue that is calling ++ * **bpf_skb_pull_data()** to effectively unclone the *skb* from ++ * the very beginning in case it is indeed cloned. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum) ++ * Description ++ * Add the checksum *csum* into *skb*\ **->csum** in case the ++ * driver has supplied a checksum for the entire packet into that ++ * field. Return an error otherwise. This helper is intended to be ++ * used in combination with **bpf_csum_diff**\ (), in particular ++ * when the checksum needs to be updated after data has been ++ * written into the packet through direct packet access. ++ * Return ++ * The checksum on success, or a negative error code in case of ++ * failure. ++ * ++ * void bpf_set_hash_invalid(struct sk_buff *skb) ++ * Description ++ * Invalidate the current *skb*\ **->hash**. It can be used after ++ * mangling on headers through direct packet access, in order to ++ * indicate that the hash is outdated and to trigger a ++ * recalculation the next time the kernel tries to access this ++ * hash or when the **bpf_get_hash_recalc**\ () helper is called. ++ * ++ * int bpf_get_numa_node_id(void) ++ * Description ++ * Return the id of the current NUMA node. The primary use case ++ * for this helper is the selection of sockets for the local NUMA ++ * node, when the program is attached to sockets using the ++ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), ++ * but the helper is also available to other eBPF program types, ++ * similarly to **bpf_get_smp_processor_id**\ (). ++ * Return ++ * The id of current NUMA node. ++ * ++ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) ++ * Description ++ * Grows headroom of packet associated to *skb* and adjusts the ++ * offset of the MAC header accordingly, adding *len* bytes of ++ * space. It automatically extends and reallocates memory as ++ * required. ++ * ++ * This helper can be used on a layer 3 *skb* to push a MAC header ++ * for redirection into a layer 2 device. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) ++ * Description ++ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that ++ * it is possible to use a negative value for *delta*. This helper ++ * can be used to prepare the packet for pushing or popping ++ * headers. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) ++ * Description ++ * Copy a NUL terminated string from an unsafe address ++ * *unsafe_ptr* to *dst*. The *size* should include the ++ * terminating NUL byte. In case the string length is smaller than ++ * *size*, the target is not padded with further NUL bytes. If the ++ * string length is larger than *size*, just *size*-1 bytes are ++ * copied and the last byte is set to NUL. ++ * ++ * On success, the length of the copied string is returned. This ++ * makes this helper useful in tracing programs for reading ++ * strings, and more importantly to get its length at runtime. See ++ * the following snippet: ++ * ++ * :: ++ * ++ * SEC("kprobe/sys_open") ++ * void bpf_sys_open(struct pt_regs *ctx) ++ * { ++ * char buf[PATHLEN]; // PATHLEN is defined to 256 ++ * int res = bpf_probe_read_str(buf, sizeof(buf), ++ * ctx->di); ++ * ++ * // Consume buf, for example push it to ++ * // userspace via bpf_perf_event_output(); we ++ * // can use res (the string length) as event ++ * // size, after checking its boundaries. ++ * } ++ * ++ * In comparison, using **bpf_probe_read()** helper here instead ++ * to read the string would require to estimate the length at ++ * compile time, and would often result in copying more memory ++ * than necessary. ++ * ++ * Another useful use case is when parsing individual process ++ * arguments or individual environment variables navigating ++ * *current*\ **->mm->arg_start** and *current*\ ++ * **->mm->env_start**: using this helper and the return value, ++ * one can quickly iterate at the right offset of the memory area. ++ * Return ++ * On success, the strictly positive length of the string, ++ * including the trailing NUL character. On error, a negative ++ * value. ++ * ++ * u64 bpf_get_socket_cookie(struct sk_buff *skb) ++ * Description ++ * If the **struct sk_buff** pointed by *skb* has a known socket, ++ * retrieve the cookie (generated by the kernel) of this socket. ++ * If no cookie has been set yet, generate a new cookie. Once ++ * generated, the socket cookie remains stable for the life of the ++ * socket. This helper can be useful for monitoring per socket ++ * networking traffic statistics as it provides a global socket ++ * identifier that can be assumed unique. ++ * Return ++ * A 8-byte long non-decreasing number on success, or 0 if the ++ * socket field is missing inside *skb*. ++ * ++ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) ++ * Description ++ * Equivalent to bpf_get_socket_cookie() helper that accepts ++ * *skb*, but gets socket from **struct bpf_sock_addr** context. ++ * Return ++ * A 8-byte long non-decreasing number. ++ * ++ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) ++ * Description ++ * Equivalent to bpf_get_socket_cookie() helper that accepts ++ * *skb*, but gets socket from **struct bpf_sock_ops** context. ++ * Return ++ * A 8-byte long non-decreasing number. ++ * ++ * u32 bpf_get_socket_uid(struct sk_buff *skb) ++ * Return ++ * The owner UID of the socket associated to *skb*. If the socket ++ * is **NULL**, or if it is not a full socket (i.e. if it is a ++ * time-wait or a request socket instead), **overflowuid** value ++ * is returned (note that **overflowuid** might also be the actual ++ * UID value for the socket). ++ * ++ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) ++ * Description ++ * Set the full hash for *skb* (set the field *skb*\ **->hash**) ++ * to value *hash*. ++ * Return ++ * 0 ++ * ++ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) ++ * Description ++ * Emulate a call to **setsockopt()** on the socket associated to ++ * *bpf_socket*, which must be a full socket. The *level* at ++ * which the option resides and the name *optname* of the option ++ * must be specified, see **setsockopt(2)** for more information. ++ * The option value of length *optlen* is pointed by *optval*. ++ * ++ * This helper actually implements a subset of **setsockopt()**. ++ * It supports the following *level*\ s: ++ * ++ * * **SOL_SOCKET**, which supports the following *optname*\ s: ++ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, ++ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. ++ * * **IPPROTO_TCP**, which supports the following *optname*\ s: ++ * **TCP_CONGESTION**, **TCP_BPF_IW**, ++ * **TCP_BPF_SNDCWND_CLAMP**. ++ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. ++ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) ++ * Description ++ * Grow or shrink the room for data in the packet associated to ++ * *skb* by *len_diff*, and according to the selected *mode*. ++ * ++ * There are two supported modes at this time: ++ * ++ * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer ++ * (room space is added or removed below the layer 2 header). ++ * ++ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer ++ * (room space is added or removed below the layer 3 header). ++ * ++ * The following flags are supported at this time: ++ * ++ * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. ++ * Adjusting mss in this way is not allowed for datagrams. ++ * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**, ++ * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**: ++ * Any new space is reserved to hold a tunnel header. ++ * Configure skb offsets and other fields accordingly. ++ * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**, ++ * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**: ++ * Use with ENCAP_L3 flags to further specify the tunnel type. ++ * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*): ++ * Use with ENCAP_L3/L4 flags to further specify the tunnel ++ * type; *len* is the length of the inner MAC header. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) ++ * Description ++ * Redirect the packet to the endpoint referenced by *map* at ++ * index *key*. Depending on its type, this *map* can contain ++ * references to net devices (for forwarding packets through other ++ * ports), or to CPUs (for redirecting XDP frames to another CPU; ++ * but this is only implemented for native XDP (with driver ++ * support) as of this writing). ++ * ++ * The lower two bits of *flags* are used as the return code if ++ * the map lookup fails. This is so that the return value can be ++ * one of the XDP program return codes up to XDP_TX, as chosen by ++ * the caller. Any higher bits in the *flags* argument must be ++ * unset. ++ * ++ * When used to redirect packets to net devices, this helper ++ * provides a high performance increase over **bpf_redirect**\ (). ++ * This is due to various implementation details of the underlying ++ * mechanisms, one of which is the fact that **bpf_redirect_map**\ ++ * () tries to send packet as a "bulk" to the device. ++ * Return ++ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. ++ * ++ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags) ++ * Description ++ * Redirect the packet to the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * Add an entry to, or update a *map* referencing sockets. The ++ * *skops* is used as a new value for the entry associated to ++ * *key*. *flags* is one of: ++ * ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. ++ * ++ * If the *map* has eBPF programs (parser and verdict), those will ++ * be inherited by the socket being added. If the socket is ++ * already attached to eBPF programs, this results in an error. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) ++ * Description ++ * Adjust the address pointed by *xdp_md*\ **->data_meta** by ++ * *delta* (which can be positive or negative). Note that this ++ * operation modifies the address stored in *xdp_md*\ **->data**, ++ * so the latter must be loaded only after the helper has been ++ * called. ++ * ++ * The use of *xdp_md*\ **->data_meta** is optional and programs ++ * are not required to use it. The rationale is that when the ++ * packet is processed with XDP (e.g. as DoS filter), it is ++ * possible to push further meta data along with it before passing ++ * to the stack, and to give the guarantee that an ingress eBPF ++ * program attached as a TC classifier on the same device can pick ++ * this up for further post-processing. Since TC works with socket ++ * buffers, it remains possible to set from XDP the **mark** or ++ * **priority** pointers, or other pointers for the socket buffer. ++ * Having this scratch space generic and programmable allows for ++ * more flexibility as the user is free to store whatever meta ++ * data they need. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) ++ * Description ++ * Read the value of a perf event counter, and store it into *buf* ++ * of size *buf_size*. This helper relies on a *map* of type ++ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event ++ * counter is selected when *map* is updated with perf event file ++ * descriptors. The *map* is an array whose size is the number of ++ * available CPUs, and each cell contains a value relative to one ++ * CPU. The value to retrieve is indicated by *flags*, that ++ * contains the index of the CPU to look up, masked with ++ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to ++ * **BPF_F_CURRENT_CPU** to indicate that the value for the ++ * current CPU should be retrieved. ++ * ++ * This helper behaves in a way close to ++ * **bpf_perf_event_read**\ () helper, save that instead of ++ * just returning the value observed, it fills the *buf* ++ * structure. This allows for additional data to be retrieved: in ++ * particular, the enabled and running times (in *buf*\ ++ * **->enabled** and *buf*\ **->running**, respectively) are ++ * copied. In general, **bpf_perf_event_read_value**\ () is ++ * recommended over **bpf_perf_event_read**\ (), which has some ++ * ABI issues and provides fewer functionalities. ++ * ++ * These values are interesting, because hardware PMU (Performance ++ * Monitoring Unit) counters are limited resources. When there are ++ * more PMU based perf events opened than available counters, ++ * kernel will multiplex these events so each event gets certain ++ * percentage (but not all) of the PMU time. In case that ++ * multiplexing happens, the number of samples or counter value ++ * will not reflect the case compared to when no multiplexing ++ * occurs. This makes comparison between different runs difficult. ++ * Typically, the counter value should be normalized before ++ * comparing to other experiments. The usual normalization is done ++ * as follows. ++ * ++ * :: ++ * ++ * normalized_counter = counter * t_enabled / t_running ++ * ++ * Where t_enabled is the time enabled for event and t_running is ++ * the time running for event since last normalization. The ++ * enabled and running times are accumulated since the perf event ++ * open. To achieve scaling factor between two invocations of an ++ * eBPF program, users can can use CPU id as the key (which is ++ * typical for perf array usage model) to remember the previous ++ * value and do the calculation inside the eBPF program. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) ++ * Description ++ * For en eBPF program attached to a perf event, retrieve the ++ * value of the event counter associated to *ctx* and store it in ++ * the structure pointed by *buf* and of size *buf_size*. Enabled ++ * and running times are also stored in the structure (see ++ * description of helper **bpf_perf_event_read_value**\ () for ++ * more details). ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) ++ * Description ++ * Emulate a call to **getsockopt()** on the socket associated to ++ * *bpf_socket*, which must be a full socket. The *level* at ++ * which the option resides and the name *optname* of the option ++ * must be specified, see **getsockopt(2)** for more information. ++ * The retrieved value is stored in the structure pointed by ++ * *opval* and of length *optlen*. ++ * ++ * This helper actually implements a subset of **getsockopt()**. ++ * It supports the following *level*\ s: ++ * ++ * * **IPPROTO_TCP**, which supports *optname* ++ * **TCP_CONGESTION**. ++ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. ++ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_override_return(struct pt_regs *regs, u64 rc) ++ * Description ++ * Used for error injection, this helper uses kprobes to override ++ * the return value of the probed function, and to set it to *rc*. ++ * The first argument is the context *regs* on which the kprobe ++ * works. ++ * ++ * This helper works by setting setting the PC (program counter) ++ * to an override function which is run in place of the original ++ * probed function. This means the probed function is not run at ++ * all. The replacement function just returns with the required ++ * value. ++ * ++ * This helper has security implications, and thus is subject to ++ * restrictions. It is only available if the kernel was compiled ++ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration ++ * option, and in this case it only works on functions tagged with ++ * **ALLOW_ERROR_INJECTION** in the kernel code. ++ * ++ * Also, the helper is only available for the architectures having ++ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, ++ * x86 architecture is the only one to support this feature. ++ * Return ++ * 0 ++ * ++ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) ++ * Description ++ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field ++ * for the full TCP socket associated to *bpf_sock_ops* to ++ * *argval*. ++ * ++ * The primary use of this field is to determine if there should ++ * be calls to eBPF programs of type ++ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP ++ * code. A program of the same type can change its value, per ++ * connection and as necessary, when the connection is ++ * established. This field is directly accessible for reading, but ++ * this helper must be used for updates in order to return an ++ * error if an eBPF program tries to set a callback that is not ++ * supported in the current kernel. ++ * ++ * *argval* is a flag array which can combine these flags: ++ * ++ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) ++ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) ++ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) ++ * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) ++ * ++ * Therefore, this function can be used to clear a callback flag by ++ * setting the appropriate bit to zero. e.g. to disable the RTO ++ * callback: ++ * ++ * **bpf_sock_ops_cb_flags_set(bpf_sock,** ++ * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** ++ * ++ * Here are some examples of where one could call such eBPF ++ * program: ++ * ++ * * When RTO fires. ++ * * When a packet is retransmitted. ++ * * When the connection terminates. ++ * * When a packet is sent. ++ * * When a packet is received. ++ * Return ++ * Code **-EINVAL** if the socket is not a full TCP socket; ++ * otherwise, a positive number containing the bits that could not ++ * be set is returned (which comes down to 0 if all bits were set ++ * as required). ++ * ++ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) ++ * Description ++ * This helper is used in programs implementing policies at the ++ * socket level. If the message *msg* is allowed to pass (i.e. if ++ * the verdict eBPF program returns **SK_PASS**), redirect it to ++ * the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) ++ * Description ++ * For socket policies, apply the verdict of the eBPF program to ++ * the next *bytes* (number of bytes) of message *msg*. ++ * ++ * For example, this helper can be used in the following cases: ++ * ++ * * A single **sendmsg**\ () or **sendfile**\ () system call ++ * contains multiple logical messages that the eBPF program is ++ * supposed to read and for which it should apply a verdict. ++ * * An eBPF program only cares to read the first *bytes* of a ++ * *msg*. If the message has a large payload, then setting up ++ * and calling the eBPF program repeatedly for all bytes, even ++ * though the verdict is already known, would create unnecessary ++ * overhead. ++ * ++ * When called from within an eBPF program, the helper sets a ++ * counter internal to the BPF infrastructure, that is used to ++ * apply the last verdict to the next *bytes*. If *bytes* is ++ * smaller than the current data being processed from a ++ * **sendmsg**\ () or **sendfile**\ () system call, the first ++ * *bytes* will be sent and the eBPF program will be re-run with ++ * the pointer for start of data pointing to byte number *bytes* ++ * **+ 1**. If *bytes* is larger than the current data being ++ * processed, then the eBPF verdict will be applied to multiple ++ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are ++ * consumed. ++ * ++ * Note that if a socket closes with the internal counter holding ++ * a non-zero value, this is not a problem because data is not ++ * being buffered for *bytes* and is sent as it is received. ++ * Return ++ * 0 ++ * ++ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) ++ * Description ++ * For socket policies, prevent the execution of the verdict eBPF ++ * program for message *msg* until *bytes* (byte number) have been ++ * accumulated. ++ * ++ * This can be used when one needs a specific number of bytes ++ * before a verdict can be assigned, even if the data spans ++ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme ++ * case would be a user calling **sendmsg**\ () repeatedly with ++ * 1-byte long message segments. Obviously, this is bad for ++ * performance, but it is still valid. If the eBPF program needs ++ * *bytes* bytes to validate a header, this helper can be used to ++ * prevent the eBPF program to be called again until *bytes* have ++ * been accumulated. ++ * Return ++ * 0 ++ * ++ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) ++ * Description ++ * For socket policies, pull in non-linear data from user space ++ * for *msg* and set pointers *msg*\ **->data** and *msg*\ ++ * **->data_end** to *start* and *end* bytes offsets into *msg*, ++ * respectively. ++ * ++ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a ++ * *msg* it can only parse data that the (**data**, **data_end**) ++ * pointers have already consumed. For **sendmsg**\ () hooks this ++ * is likely the first scatterlist element. But for calls relying ++ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will ++ * be the range (**0**, **0**) because the data is shared with ++ * user space and by default the objective is to avoid allowing ++ * user space to modify data while (or after) eBPF verdict is ++ * being decided. This helper can be used to pull in data and to ++ * set the start and end pointer to given values. Data will be ++ * copied if necessary (i.e. if data was not linear and if start ++ * and end pointers do not point to the same chunk). ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) ++ * Description ++ * Bind the socket associated to *ctx* to the address pointed by ++ * *addr*, of length *addr_len*. This allows for making outgoing ++ * connection from the desired IP address, which can be useful for ++ * example when all processes inside a cgroup should use one ++ * single IP address on a host that has multiple IP configured. ++ * ++ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The ++ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or ++ * **AF_INET6**). Looking for a free port to bind to can be ++ * expensive, therefore binding to port is not permitted by the ++ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) ++ * must be set to zero. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) ++ * Description ++ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is ++ * only possible to shrink the packet as of this writing, ++ * therefore *delta* must be a negative integer. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) ++ * Description ++ * Retrieve the XFRM state (IP transform framework, see also ++ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. ++ * ++ * The retrieved value is stored in the **struct bpf_xfrm_state** ++ * pointed by *xfrm_state* and of length *size*. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_XFRM** configuration option. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) ++ * Description ++ * Return a user or a kernel stack in bpf program provided buffer. ++ * To achieve this, the helper needs *ctx*, which is a pointer ++ * to the context on which the tracing program is executed. ++ * To store the stacktrace, the bpf program provides *buf* with ++ * a nonnegative *size*. ++ * ++ * The last argument, *flags*, holds the number of stack frames to ++ * skip (from 0 to 255), masked with ++ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set ++ * the following flags: ++ * ++ * **BPF_F_USER_STACK** ++ * Collect a user space stack instead of a kernel stack. ++ * **BPF_F_USER_BUILD_ID** ++ * Collect buildid+offset instead of ips for user stack, ++ * only valid if **BPF_F_USER_STACK** is also specified. ++ * ++ * **bpf_get_stack**\ () can collect up to ++ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject ++ * to sufficient large buffer size. Note that ++ * this limit can be controlled with the **sysctl** program, and ++ * that it should be manually increased in order to profile long ++ * user stacks (such as stacks for Java programs). To do so, use: ++ * ++ * :: ++ * ++ * # sysctl kernel.perf_event_max_stack= ++ * Return ++ * A non-negative value equal to or less than *size* on success, ++ * or a negative error in case of failure. ++ * ++ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) ++ * Description ++ * This helper is similar to **bpf_skb_load_bytes**\ () in that ++ * it provides an easy way to load *len* bytes from *offset* ++ * from the packet associated to *skb*, into the buffer pointed ++ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that ++ * a fifth argument *start_header* exists in order to select a ++ * base offset to start from. *start_header* can be one of: ++ * ++ * **BPF_HDR_START_MAC** ++ * Base offset to load data from is *skb*'s mac header. ++ * **BPF_HDR_START_NET** ++ * Base offset to load data from is *skb*'s network header. ++ * ++ * In general, "direct packet access" is the preferred method to ++ * access packet data, however, this helper is in particular useful ++ * in socket filters where *skb*\ **->data** does not always point ++ * to the start of the mac header and where "direct packet access" ++ * is not available. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) ++ * Description ++ * Do FIB lookup in kernel tables using parameters in *params*. ++ * If lookup is successful and result shows packet is to be ++ * forwarded, the neighbor tables are searched for the nexthop. ++ * If successful (ie., FIB lookup shows forwarding and nexthop ++ * is resolved), the nexthop address is returned in ipv4_dst ++ * or ipv6_dst based on family, smac is set to mac address of ++ * egress device, dmac is set to nexthop mac address, rt_metric ++ * is set to metric from route (IPv4/IPv6 only), and ifindex ++ * is set to the device index of the nexthop from the FIB lookup. ++ * ++ * *plen* argument is the size of the passed in struct. ++ * *flags* argument can be a combination of one or more of the ++ * following values: ++ * ++ * **BPF_FIB_LOOKUP_DIRECT** ++ * Do a direct table lookup vs full lookup using FIB ++ * rules. ++ * **BPF_FIB_LOOKUP_OUTPUT** ++ * Perform lookup from an egress perspective (default is ++ * ingress). ++ * ++ * *ctx* is either **struct xdp_md** for XDP programs or ++ * **struct sk_buff** tc cls_act programs. ++ * Return ++ * * < 0 if any input argument is invalid ++ * * 0 on success (packet is forwarded, nexthop neighbor exists) ++ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the ++ * packet is not forwarded or needs assist from full stack ++ * ++ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * Add an entry to, or update a sockhash *map* referencing sockets. ++ * The *skops* is used as a new value for the entry associated to ++ * *key*. *flags* is one of: ++ * ++ * **BPF_NOEXIST** ++ * The entry for *key* must not exist in the map. ++ * **BPF_EXIST** ++ * The entry for *key* must already exist in the map. ++ * **BPF_ANY** ++ * No condition on the existence of the entry for *key*. ++ * ++ * If the *map* has eBPF programs (parser and verdict), those will ++ * be inherited by the socket being added. If the socket is ++ * already attached to eBPF programs, this results in an error. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * This helper is used in programs implementing policies at the ++ * socket level. If the message *msg* is allowed to pass (i.e. if ++ * the verdict eBPF program returns **SK_PASS**), redirect it to ++ * the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress path otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * This helper is used in programs implementing policies at the ++ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. ++ * if the verdeict eBPF program returns **SK_PASS**), redirect it ++ * to the socket referenced by *map* (of type ++ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and ++ * egress interfaces can be used for redirection. The ++ * **BPF_F_INGRESS** value in *flags* is used to make the ++ * distinction (ingress path is selected if the flag is present, ++ * egress otherwise). This is the only flag supported for now. ++ * Return ++ * **SK_PASS** on success, or **SK_DROP** on error. ++ * ++ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) ++ * Description ++ * Encapsulate the packet associated to *skb* within a Layer 3 ++ * protocol header. This header is provided in the buffer at ++ * address *hdr*, with *len* its size in bytes. *type* indicates ++ * the protocol of the header and can be one of: ++ * ++ * **BPF_LWT_ENCAP_SEG6** ++ * IPv6 encapsulation with Segment Routing Header ++ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, ++ * the IPv6 header is computed by the kernel. ++ * **BPF_LWT_ENCAP_SEG6_INLINE** ++ * Only works if *skb* contains an IPv6 packet. Insert a ++ * Segment Routing Header (**struct ipv6_sr_hdr**) inside ++ * the IPv6 header. ++ * **BPF_LWT_ENCAP_IP** ++ * IP encapsulation (GRE/GUE/IPIP/etc). The outer header ++ * must be IPv4 or IPv6, followed by zero or more ++ * additional headers, up to **LWT_BPF_MAX_HEADROOM** ++ * total bytes in all prepended headers. Please note that ++ * if **skb_is_gso**\ (*skb*) is true, no more than two ++ * headers can be prepended, and the inner header, if ++ * present, should be either GRE or UDP/GUE. ++ * ++ * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs ++ * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can ++ * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and ++ * **BPF_PROG_TYPE_LWT_XMIT**. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) ++ * Description ++ * Store *len* bytes from address *from* into the packet ++ * associated to *skb*, at *offset*. Only the flags, tag and TLVs ++ * inside the outermost IPv6 Segment Routing Header can be ++ * modified through this helper. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) ++ * Description ++ * Adjust the size allocated to TLVs in the outermost IPv6 ++ * Segment Routing Header contained in the packet associated to ++ * *skb*, at position *offset* by *delta* bytes. Only offsets ++ * after the segments are accepted. *delta* can be as well ++ * positive (growing) as negative (shrinking). ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) ++ * Description ++ * Apply an IPv6 Segment Routing action of type *action* to the ++ * packet associated to *skb*. Each action takes a parameter ++ * contained at address *param*, and of length *param_len* bytes. ++ * *action* can be one of: ++ * ++ * **SEG6_LOCAL_ACTION_END_X** ++ * End.X action: Endpoint with Layer-3 cross-connect. ++ * Type of *param*: **struct in6_addr**. ++ * **SEG6_LOCAL_ACTION_END_T** ++ * End.T action: Endpoint with specific IPv6 table lookup. ++ * Type of *param*: **int**. ++ * **SEG6_LOCAL_ACTION_END_B6** ++ * End.B6 action: Endpoint bound to an SRv6 policy. ++ * Type of *param*: **struct ipv6_sr_hdr**. ++ * **SEG6_LOCAL_ACTION_END_B6_ENCAP** ++ * End.B6.Encap action: Endpoint bound to an SRv6 ++ * encapsulation policy. ++ * Type of *param*: **struct ipv6_sr_hdr**. ++ * ++ * A call to this helper is susceptible to change the underlying ++ * packet buffer. Therefore, at load time, all checks on pointers ++ * previously done by the verifier are invalidated and must be ++ * performed again, if the helper is used in combination with ++ * direct packet access. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_rc_repeat(void *ctx) ++ * Description ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded repeat key message. This delays ++ * the generation of a key up event for previously generated ++ * key down event. ++ * ++ * Some IR protocols like NEC have a special IR message for ++ * repeating last button, for when a button is held down. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * Return ++ * 0 ++ * ++ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) ++ * Description ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded key press with *scancode*, ++ * *toggle* value in the given *protocol*. The scancode will be ++ * translated to a keycode using the rc keymap, and reported as ++ * an input key down event. After a period a key up event is ++ * generated. This period can be extended by calling either ++ * **bpf_rc_keydown**\ () again with the same values, or calling ++ * **bpf_rc_repeat**\ (). ++ * ++ * Some protocols include a toggle bit, in case the button was ++ * released and pressed again between consecutive scancodes. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * The *protocol* is the decoded protocol number (see ++ * **enum rc_proto** for some predefined values). ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * Return ++ * 0 ++ * ++ * u64 bpf_skb_cgroup_id(struct sk_buff *skb) ++ * Description ++ * Return the cgroup v2 id of the socket associated with the *skb*. ++ * This is roughly similar to the **bpf_get_cgroup_classid**\ () ++ * helper for cgroup v1 by providing a tag resp. identifier that ++ * can be matched on or used for map lookups e.g. to implement ++ * policy. The cgroup v2 id of a given path in the hierarchy is ++ * exposed in user space through the f_handle API in order to get ++ * to the same 64-bit id. ++ * ++ * This helper can be used on TC egress path, but not on ingress, ++ * and is available only if the kernel was compiled with the ++ * **CONFIG_SOCK_CGROUP_DATA** configuration option. ++ * Return ++ * The id is returned or 0 in case the id could not be retrieved. ++ * ++ * u64 bpf_get_current_cgroup_id(void) ++ * Return ++ * A 64-bit integer containing the current cgroup id based ++ * on the cgroup within which the current task is running. ++ * ++ * void *bpf_get_local_storage(void *map, u64 flags) ++ * Description ++ * Get the pointer to the local storage area. ++ * The type and the size of the local storage is defined ++ * by the *map* argument. ++ * The *flags* meaning is specific for each map type, ++ * and has to be 0 for cgroup local storage. ++ * ++ * Depending on the BPF program type, a local storage area ++ * can be shared between multiple instances of the BPF program, ++ * running simultaneously. ++ * ++ * A user should care about the synchronization by himself. ++ * For example, by using the **BPF_STX_XADD** instruction to alter ++ * the shared data. ++ * Return ++ * A pointer to the local storage area. ++ * ++ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) ++ * Description ++ * Select a **SO_REUSEPORT** socket from a ++ * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. ++ * It checks the selected socket is matching the incoming ++ * request in the socket buffer. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) ++ * Description ++ * Return id of cgroup v2 that is ancestor of cgroup associated ++ * with the *skb* at the *ancestor_level*. The root cgroup is at ++ * *ancestor_level* zero and each step down the hierarchy ++ * increments the level. If *ancestor_level* == level of cgroup ++ * associated with *skb*, then return value will be same as that ++ * of **bpf_skb_cgroup_id**\ (). ++ * ++ * The helper is useful to implement policies based on cgroups ++ * that are upper in hierarchy than immediate cgroup associated ++ * with *skb*. ++ * ++ * The format of returned id and helper limitations are same as in ++ * **bpf_skb_cgroup_id**\ (). ++ * Return ++ * The id is returned or 0 in case the id could not be retrieved. ++ * ++ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) ++ * Description ++ * Look for TCP socket matching *tuple*, optionally in a child ++ * network namespace *netns*. The return value must be checked, ++ * and if non-**NULL**, released via **bpf_sk_release**\ (). ++ * ++ * The *ctx* should point to the context of the program, such as ++ * the skb or socket (depending on the hook in use). This is used ++ * to determine the base network namespace for the lookup. ++ * ++ * *tuple_size* must be one of: ++ * ++ * **sizeof**\ (*tuple*\ **->ipv4**) ++ * Look for an IPv4 socket. ++ * **sizeof**\ (*tuple*\ **->ipv6**) ++ * Look for an IPv6 socket. ++ * ++ * If the *netns* is a negative signed 32-bit integer, then the ++ * socket lookup table in the netns associated with the *ctx* will ++ * will be used. For the TC hooks, this is the netns of the device ++ * in the skb. For socket hooks, this is the netns of the socket. ++ * If *netns* is any other signed 32-bit value greater than or ++ * equal to zero then it specifies the ID of the netns relative to ++ * the netns associated with the *ctx*. *netns* values beyond the ++ * range of 32-bit integers are reserved for future use. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_NET** configuration option. ++ * Return ++ * Pointer to **struct bpf_sock**, or **NULL** in case of failure. ++ * For sockets with reuseport option, the **struct bpf_sock** ++ * result is from *reuse*\ **->socks**\ [] using the hash of the ++ * tuple. ++ * ++ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) ++ * Description ++ * Look for UDP socket matching *tuple*, optionally in a child ++ * network namespace *netns*. The return value must be checked, ++ * and if non-**NULL**, released via **bpf_sk_release**\ (). ++ * ++ * The *ctx* should point to the context of the program, such as ++ * the skb or socket (depending on the hook in use). This is used ++ * to determine the base network namespace for the lookup. ++ * ++ * *tuple_size* must be one of: ++ * ++ * **sizeof**\ (*tuple*\ **->ipv4**) ++ * Look for an IPv4 socket. ++ * **sizeof**\ (*tuple*\ **->ipv6**) ++ * Look for an IPv6 socket. ++ * ++ * If the *netns* is a negative signed 32-bit integer, then the ++ * socket lookup table in the netns associated with the *ctx* will ++ * will be used. For the TC hooks, this is the netns of the device ++ * in the skb. For socket hooks, this is the netns of the socket. ++ * If *netns* is any other signed 32-bit value greater than or ++ * equal to zero then it specifies the ID of the netns relative to ++ * the netns associated with the *ctx*. *netns* values beyond the ++ * range of 32-bit integers are reserved for future use. ++ * ++ * All values for *flags* are reserved for future usage, and must ++ * be left at zero. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_NET** configuration option. ++ * Return ++ * Pointer to **struct bpf_sock**, or **NULL** in case of failure. ++ * For sockets with reuseport option, the **struct bpf_sock** ++ * result is from *reuse*\ **->socks**\ [] using the hash of the ++ * tuple. ++ * ++ * int bpf_sk_release(struct bpf_sock *sock) ++ * Description ++ * Release the reference held by *sock*. *sock* must be a ++ * non-**NULL** pointer that was returned from ++ * **bpf_sk_lookup_xxx**\ (). ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) ++ * Description ++ * Push an element *value* in *map*. *flags* is one of: ++ * ++ * **BPF_EXIST** ++ * If the queue/stack is full, the oldest element is ++ * removed to make room for this. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_map_pop_elem(struct bpf_map *map, void *value) ++ * Description ++ * Pop an element from *map*. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_map_peek_elem(struct bpf_map *map, void *value) ++ * Description ++ * Get an element from *map* without removing it. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) ++ * Description ++ * For socket policies, insert *len* bytes into *msg* at offset ++ * *start*. ++ * ++ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a ++ * *msg* it may want to insert metadata or options into the *msg*. ++ * This can later be read and used by any of the lower layer BPF ++ * hooks. ++ * ++ * This helper may fail if under memory pressure (a malloc ++ * fails) in these cases BPF programs will get an appropriate ++ * error and BPF programs will need to handle them. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) ++ * Description ++ * Will remove *pop* bytes from a *msg* starting at byte *start*. ++ * This may result in **ENOMEM** errors under certain situations if ++ * an allocation and copy are required due to a full ring buffer. ++ * However, the helper will try to avoid doing the allocation ++ * if possible. Other errors can occur if input parameters are ++ * invalid either due to *start* byte not being valid part of *msg* ++ * payload and/or *pop* value being to large. ++ * Return ++ * 0 on success, or a negative error in case of failure. ++ * ++ * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) ++ * Description ++ * This helper is used in programs implementing IR decoding, to ++ * report a successfully decoded pointer movement. ++ * ++ * The *ctx* should point to the lirc sample as passed into ++ * the program. ++ * ++ * This helper is only available is the kernel was compiled with ++ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to ++ * "**y**". ++ * Return ++ * 0 ++ * ++ * int bpf_spin_lock(struct bpf_spin_lock *lock) ++ * Description ++ * Acquire a spinlock represented by the pointer *lock*, which is ++ * stored as part of a value of a map. Taking the lock allows to ++ * safely update the rest of the fields in that value. The ++ * spinlock can (and must) later be released with a call to ++ * **bpf_spin_unlock**\ (\ *lock*\ ). ++ * ++ * Spinlocks in BPF programs come with a number of restrictions ++ * and constraints: ++ * ++ * * **bpf_spin_lock** objects are only allowed inside maps of ++ * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this ++ * list could be extended in the future). ++ * * BTF description of the map is mandatory. ++ * * The BPF program can take ONE lock at a time, since taking two ++ * or more could cause dead locks. ++ * * Only one **struct bpf_spin_lock** is allowed per map element. ++ * * When the lock is taken, calls (either BPF to BPF or helpers) ++ * are not allowed. ++ * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not ++ * allowed inside a spinlock-ed region. ++ * * The BPF program MUST call **bpf_spin_unlock**\ () to release ++ * the lock, on all execution paths, before it returns. ++ * * The BPF program can access **struct bpf_spin_lock** only via ++ * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () ++ * helpers. Loading or storing data into the **struct ++ * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. ++ * * To use the **bpf_spin_lock**\ () helper, the BTF description ++ * of the map value must be a struct and have **struct ++ * bpf_spin_lock** *anyname*\ **;** field at the top level. ++ * Nested lock inside another struct is not allowed. ++ * * The **struct bpf_spin_lock** *lock* field in a map value must ++ * be aligned on a multiple of 4 bytes in that value. ++ * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy ++ * the **bpf_spin_lock** field to user space. ++ * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from ++ * a BPF program, do not update the **bpf_spin_lock** field. ++ * * **bpf_spin_lock** cannot be on the stack or inside a ++ * networking packet (it can only be inside of a map values). ++ * * **bpf_spin_lock** is available to root only. ++ * * Tracing programs and socket filter programs cannot use ++ * **bpf_spin_lock**\ () due to insufficient preemption checks ++ * (but this may change in the future). ++ * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. ++ * Return ++ * 0 ++ * ++ * int bpf_spin_unlock(struct bpf_spin_lock *lock) ++ * Description ++ * Release the *lock* previously locked by a call to ++ * **bpf_spin_lock**\ (\ *lock*\ ). ++ * Return ++ * 0 ++ * ++ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) ++ * Description ++ * This helper gets a **struct bpf_sock** pointer such ++ * that all the fields in this **bpf_sock** can be accessed. ++ * Return ++ * A **struct bpf_sock** pointer on success, or **NULL** in ++ * case of failure. ++ * ++ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) ++ * Description ++ * This helper gets a **struct bpf_tcp_sock** pointer from a ++ * **struct bpf_sock** pointer. ++ * Return ++ * A **struct bpf_tcp_sock** pointer on success, or **NULL** in ++ * case of failure. ++ * ++ * int bpf_skb_ecn_set_ce(struct sk_buf *skb) ++ * Description ++ * Set ECN (Explicit Congestion Notification) field of IP header ++ * to **CE** (Congestion Encountered) if current value is **ECT** ++ * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 ++ * and IPv4. ++ * Return ++ * 1 if the **CE** flag is set (either by the current helper call ++ * or because it was already present), 0 if it is not set. ++ * ++ * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) ++ * Description ++ * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. ++ * **bpf_sk_release**\ () is unnecessary and not allowed. ++ * Return ++ * A **struct bpf_sock** pointer on success, or **NULL** in ++ * case of failure. ++ * ++ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) ++ * Description ++ * Look for TCP socket matching *tuple*, optionally in a child ++ * network namespace *netns*. The return value must be checked, ++ * and if non-**NULL**, released via **bpf_sk_release**\ (). ++ * ++ * This function is identical to **bpf_sk_lookup_tcp**\ (), except ++ * that it also returns timewait or request sockets. Use ++ * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the ++ * full structure. ++ * ++ * This helper is available only if the kernel was compiled with ++ * **CONFIG_NET** configuration option. ++ * Return ++ * Pointer to **struct bpf_sock**, or **NULL** in case of failure. ++ * For sockets with reuseport option, the **struct bpf_sock** ++ * result is from *reuse*\ **->socks**\ [] using the hash of the ++ * tuple. ++ * ++ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) ++ * Description ++ * Check whether *iph* and *th* contain a valid SYN cookie ACK for ++ * the listening socket in *sk*. ++ * ++ * *iph* points to the start of the IPv4 or IPv6 header, while ++ * *iph_len* contains **sizeof**\ (**struct iphdr**) or ++ * **sizeof**\ (**struct ip6hdr**). ++ * ++ * *th* points to the start of the TCP header, while *th_len* ++ * contains **sizeof**\ (**struct tcphdr**). ++ * ++ * Return ++ * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative ++ * error otherwise. ++ * ++ * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) ++ * Description ++ * Get name of sysctl in /proc/sys/ and copy it into provided by ++ * program buffer *buf* of size *buf_len*. ++ * ++ * The buffer is always NUL terminated, unless it's zero-sized. ++ * ++ * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is ++ * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name ++ * only (e.g. "tcp_mem"). ++ * Return ++ * Number of character copied (not including the trailing NUL). ++ * ++ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain ++ * truncated name in this case). ++ * ++ * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) ++ * Description ++ * Get current value of sysctl as it is presented in /proc/sys ++ * (incl. newline, etc), and copy it as a string into provided ++ * by program buffer *buf* of size *buf_len*. ++ * ++ * The whole value is copied, no matter what file position user ++ * space issued e.g. sys_read at. ++ * ++ * The buffer is always NUL terminated, unless it's zero-sized. ++ * Return ++ * Number of character copied (not including the trailing NUL). ++ * ++ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain ++ * truncated name in this case). ++ * ++ * **-EINVAL** if current value was unavailable, e.g. because ++ * sysctl is uninitialized and read returns -EIO for it. ++ * ++ * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) ++ * Description ++ * Get new value being written by user space to sysctl (before ++ * the actual write happens) and copy it as a string into ++ * provided by program buffer *buf* of size *buf_len*. ++ * ++ * User space may write new value at file position > 0. ++ * ++ * The buffer is always NUL terminated, unless it's zero-sized. ++ * Return ++ * Number of character copied (not including the trailing NUL). ++ * ++ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain ++ * truncated name in this case). ++ * ++ * **-EINVAL** if sysctl is being read. ++ * ++ * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) ++ * Description ++ * Override new value being written by user space to sysctl with ++ * value provided by program in buffer *buf* of size *buf_len*. ++ * ++ * *buf* should contain a string in same form as provided by user ++ * space on sysctl write. ++ * ++ * User space may write new value at file position > 0. To override ++ * the whole sysctl value file position should be set to zero. ++ * Return ++ * 0 on success. ++ * ++ * **-E2BIG** if the *buf_len* is too big. ++ * ++ * **-EINVAL** if sysctl is being read. ++ * ++ * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) ++ * Description ++ * Convert the initial part of the string from buffer *buf* of ++ * size *buf_len* to a long integer according to the given base ++ * and save the result in *res*. ++ * ++ * The string may begin with an arbitrary amount of white space ++ * (as determined by **isspace**\ (3)) followed by a single ++ * optional '**-**' sign. ++ * ++ * Five least significant bits of *flags* encode base, other bits ++ * are currently unused. ++ * ++ * Base must be either 8, 10, 16 or 0 to detect it automatically ++ * similar to user space **strtol**\ (3). ++ * Return ++ * Number of characters consumed on success. Must be positive but ++ * no more than *buf_len*. ++ * ++ * **-EINVAL** if no valid digits were found or unsupported base ++ * was provided. ++ * ++ * **-ERANGE** if resulting value was out of range. ++ * ++ * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) ++ * Description ++ * Convert the initial part of the string from buffer *buf* of ++ * size *buf_len* to an unsigned long integer according to the ++ * given base and save the result in *res*. ++ * ++ * The string may begin with an arbitrary amount of white space ++ * (as determined by **isspace**\ (3)). ++ * ++ * Five least significant bits of *flags* encode base, other bits ++ * are currently unused. ++ * ++ * Base must be either 8, 10, 16 or 0 to detect it automatically ++ * similar to user space **strtoul**\ (3). ++ * Return ++ * Number of characters consumed on success. Must be positive but ++ * no more than *buf_len*. ++ * ++ * **-EINVAL** if no valid digits were found or unsupported base ++ * was provided. ++ * ++ * **-ERANGE** if resulting value was out of range. ++ * ++ * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) ++ * Description ++ * Get a bpf-local-storage from a *sk*. ++ * ++ * Logically, it could be thought of getting the value from ++ * a *map* with *sk* as the **key**. From this ++ * perspective, the usage is not much different from ++ * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this ++ * helper enforces the key must be a full socket and the map must ++ * be a **BPF_MAP_TYPE_SK_STORAGE** also. ++ * ++ * Underneath, the value is stored locally at *sk* instead of ++ * the *map*. The *map* is used as the bpf-local-storage ++ * "type". The bpf-local-storage "type" (i.e. the *map*) is ++ * searched against all bpf-local-storages residing at *sk*. ++ * ++ * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be ++ * used such that a new bpf-local-storage will be ++ * created if one does not exist. *value* can be used ++ * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify ++ * the initial value of a bpf-local-storage. If *value* is ++ * **NULL**, the new bpf-local-storage will be zero initialized. ++ * Return ++ * A bpf-local-storage pointer is returned on success. ++ * ++ * **NULL** if not found or there was an error in adding ++ * a new bpf-local-storage. ++ * ++ * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) ++ * Description ++ * Delete a bpf-local-storage from a *sk*. ++ * Return ++ * 0 on success. ++ * ++ * **-ENOENT** if the bpf-local-storage cannot be found. ++ * ++ * int bpf_send_signal(u32 sig) ++ * Description ++ * Send signal *sig* to the current task. ++ * Return ++ * 0 on success or successfully queued. ++ * ++ * **-EBUSY** if work queue under nmi is full. ++ * ++ * **-EINVAL** if *sig* is invalid. ++ * ++ * **-EPERM** if no permission to send the *sig*. ++ * ++ * **-EAGAIN** if bpf program can try again. ++ * ++ * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) ++ * Description ++ * Try to issue a SYN cookie for the packet with corresponding ++ * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. ++ * ++ * *iph* points to the start of the IPv4 or IPv6 header, while ++ * *iph_len* contains **sizeof**\ (**struct iphdr**) or ++ * **sizeof**\ (**struct ip6hdr**). ++ * ++ * *th* points to the start of the TCP header, while *th_len* ++ * contains the length of the TCP header. ++ * ++ * Return ++ * On success, lower 32 bits hold the generated SYN cookie in ++ * followed by 16 bits which hold the MSS value for that cookie, ++ * and the top 16 bits are unused. ++ * ++ * On failure, the returned value is one of the following: ++ * ++ * **-EINVAL** SYN cookie cannot be issued due to error ++ * ++ * **-ENOENT** SYN cookie should not be issued (no SYN flood) ++ * ++ * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies ++ * ++ * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 ++ */ ++#define __BPF_FUNC_MAPPER(FN) \ ++ FN(unspec), \ ++ FN(map_lookup_elem), \ ++ FN(map_update_elem), \ ++ FN(map_delete_elem), \ ++ FN(probe_read), \ ++ FN(ktime_get_ns), \ ++ FN(trace_printk), \ ++ FN(get_prandom_u32), \ ++ FN(get_smp_processor_id), \ ++ FN(skb_store_bytes), \ ++ FN(l3_csum_replace), \ ++ FN(l4_csum_replace), \ ++ FN(tail_call), \ ++ FN(clone_redirect), \ ++ FN(get_current_pid_tgid), \ ++ FN(get_current_uid_gid), \ ++ FN(get_current_comm), \ ++ FN(get_cgroup_classid), \ ++ FN(skb_vlan_push), \ ++ FN(skb_vlan_pop), \ ++ FN(skb_get_tunnel_key), \ ++ FN(skb_set_tunnel_key), \ ++ FN(perf_event_read), \ ++ FN(redirect), \ ++ FN(get_route_realm), \ ++ FN(perf_event_output), \ ++ FN(skb_load_bytes), \ ++ FN(get_stackid), \ ++ FN(csum_diff), \ ++ FN(skb_get_tunnel_opt), \ ++ FN(skb_set_tunnel_opt), \ ++ FN(skb_change_proto), \ ++ FN(skb_change_type), \ ++ FN(skb_under_cgroup), \ ++ FN(get_hash_recalc), \ ++ FN(get_current_task), \ ++ FN(probe_write_user), \ ++ FN(current_task_under_cgroup), \ ++ FN(skb_change_tail), \ ++ FN(skb_pull_data), \ ++ FN(csum_update), \ ++ FN(set_hash_invalid), \ ++ FN(get_numa_node_id), \ ++ FN(skb_change_head), \ ++ FN(xdp_adjust_head), \ ++ FN(probe_read_str), \ ++ FN(get_socket_cookie), \ ++ FN(get_socket_uid), \ ++ FN(set_hash), \ ++ FN(setsockopt), \ ++ FN(skb_adjust_room), \ ++ FN(redirect_map), \ ++ FN(sk_redirect_map), \ ++ FN(sock_map_update), \ ++ FN(xdp_adjust_meta), \ ++ FN(perf_event_read_value), \ ++ FN(perf_prog_read_value), \ ++ FN(getsockopt), \ ++ FN(override_return), \ ++ FN(sock_ops_cb_flags_set), \ ++ FN(msg_redirect_map), \ ++ FN(msg_apply_bytes), \ ++ FN(msg_cork_bytes), \ ++ FN(msg_pull_data), \ ++ FN(bind), \ ++ FN(xdp_adjust_tail), \ ++ FN(skb_get_xfrm_state), \ ++ FN(get_stack), \ ++ FN(skb_load_bytes_relative), \ ++ FN(fib_lookup), \ ++ FN(sock_hash_update), \ ++ FN(msg_redirect_hash), \ ++ FN(sk_redirect_hash), \ ++ FN(lwt_push_encap), \ ++ FN(lwt_seg6_store_bytes), \ ++ FN(lwt_seg6_adjust_srh), \ ++ FN(lwt_seg6_action), \ ++ FN(rc_repeat), \ ++ FN(rc_keydown), \ ++ FN(skb_cgroup_id), \ ++ FN(get_current_cgroup_id), \ ++ FN(get_local_storage), \ ++ FN(sk_select_reuseport), \ ++ FN(skb_ancestor_cgroup_id), \ ++ FN(sk_lookup_tcp), \ ++ FN(sk_lookup_udp), \ ++ FN(sk_release), \ ++ FN(map_push_elem), \ ++ FN(map_pop_elem), \ ++ FN(map_peek_elem), \ ++ FN(msg_push_data), \ ++ FN(msg_pop_data), \ ++ FN(rc_pointer_rel), \ ++ FN(spin_lock), \ ++ FN(spin_unlock), \ ++ FN(sk_fullsock), \ ++ FN(tcp_sock), \ ++ FN(skb_ecn_set_ce), \ ++ FN(get_listener_sock), \ ++ FN(skc_lookup_tcp), \ ++ FN(tcp_check_syncookie), \ ++ FN(sysctl_get_name), \ ++ FN(sysctl_get_current_value), \ ++ FN(sysctl_get_new_value), \ ++ FN(sysctl_set_new_value), \ ++ FN(strtol), \ ++ FN(strtoul), \ ++ FN(sk_storage_get), \ ++ FN(sk_storage_delete), \ ++ FN(send_signal), \ ++ FN(tcp_gen_syncookie), ++ + /* integer value in 'imm' field of BPF_CALL instruction selects which helper + * function eBPF program intends to call + */ ++#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x + enum bpf_func_id { +- BPF_FUNC_unspec, +- BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ +- BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ +- BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ +- BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ +- BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ +- BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ +- BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ +- BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ +- +- /** +- * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet +- * @skb: pointer to skb +- * @offset: offset within packet from skb->mac_header +- * @from: pointer where to copy bytes from +- * @len: number of bytes to store into packet +- * @flags: bit 0 - if true, recompute skb->csum +- * other bits - reserved +- * Return: 0 on success +- */ +- BPF_FUNC_skb_store_bytes, +- +- /** +- * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum +- * @skb: pointer to skb +- * @offset: offset within packet where IP checksum is located +- * @from: old value of header field +- * @to: new value of header field +- * @flags: bits 0-3 - size of header field +- * other bits - reserved +- * Return: 0 on success +- */ +- BPF_FUNC_l3_csum_replace, +- +- /** +- * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum +- * @skb: pointer to skb +- * @offset: offset within packet where TCP/UDP checksum is located +- * @from: old value of header field +- * @to: new value of header field +- * @flags: bits 0-3 - size of header field +- * bit 4 - is pseudo header +- * other bits - reserved +- * Return: 0 on success +- */ +- BPF_FUNC_l4_csum_replace, ++ __BPF_FUNC_MAPPER(__BPF_ENUM_FN) ++ __BPF_FUNC_MAX_ID, ++}; ++#undef __BPF_ENUM_FN + +- /** +- * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program +- * @ctx: context pointer passed to next program +- * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY +- * @index: index inside array that selects specific program to run +- * Return: 0 on success +- */ +- BPF_FUNC_tail_call, ++/* All flags used by eBPF helper functions, placed here. */ + +- /** +- * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev +- * @skb: pointer to skb +- * @ifindex: ifindex of the net device +- * @flags: bit 0 - if set, redirect to ingress instead of egress +- * other bits - reserved +- * Return: 0 on success +- */ +- BPF_FUNC_clone_redirect, ++/* BPF_FUNC_skb_store_bytes flags. */ ++#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) ++#define BPF_F_INVALIDATE_HASH (1ULL << 1) + +- /** +- * u64 bpf_get_current_pid_tgid(void) +- * Return: current->tgid << 32 | current->pid +- */ +- BPF_FUNC_get_current_pid_tgid, ++/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. ++ * First 4 bits are for passing the header field size. ++ */ ++#define BPF_F_HDR_FIELD_MASK 0xfULL + +- /** +- * u64 bpf_get_current_uid_gid(void) +- * Return: current_gid << 32 | current_uid +- */ +- BPF_FUNC_get_current_uid_gid, ++/* BPF_FUNC_l4_csum_replace flags. */ ++#define BPF_F_PSEUDO_HDR (1ULL << 4) ++#define BPF_F_MARK_MANGLED_0 (1ULL << 5) ++#define BPF_F_MARK_ENFORCE (1ULL << 6) ++ ++/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ ++#define BPF_F_INGRESS (1ULL << 0) ++ ++/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ ++#define BPF_F_TUNINFO_IPV6 (1ULL << 0) ++ ++/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ ++#define BPF_F_SKIP_FIELD_MASK 0xffULL ++#define BPF_F_USER_STACK (1ULL << 8) ++/* flags used by BPF_FUNC_get_stackid only. */ ++#define BPF_F_FAST_STACK_CMP (1ULL << 9) ++#define BPF_F_REUSE_STACKID (1ULL << 10) ++/* flags used by BPF_FUNC_get_stack only. */ ++#define BPF_F_USER_BUILD_ID (1ULL << 11) ++ ++/* BPF_FUNC_skb_set_tunnel_key flags. */ ++#define BPF_F_ZERO_CSUM_TX (1ULL << 1) ++#define BPF_F_DONT_FRAGMENT (1ULL << 2) ++#define BPF_F_SEQ_NUMBER (1ULL << 3) + +- /** +- * bpf_get_current_comm(char *buf, int size_of_buf) +- * stores current->comm into buf +- * Return: 0 on success +- */ +- BPF_FUNC_get_current_comm, +- +- /** +- * bpf_get_cgroup_classid(skb) - retrieve a proc's classid +- * @skb: pointer to skb +- * Return: classid if != 0 +- */ +- BPF_FUNC_get_cgroup_classid, +- BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */ +- BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */ +- +- /** +- * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags) +- * retrieve or populate tunnel metadata +- * @skb: pointer to skb +- * @key: pointer to 'struct bpf_tunnel_key' +- * @size: size of 'struct bpf_tunnel_key' +- * @flags: room for future extensions +- * Retrun: 0 on success +- */ +- BPF_FUNC_skb_get_tunnel_key, +- BPF_FUNC_skb_set_tunnel_key, +- BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ +- /** +- * bpf_redirect(ifindex, flags) - redirect to another netdev +- * @ifindex: ifindex of the net device +- * @flags: bit 0 - if set, redirect to ingress instead of egress +- * other bits - reserved +- * Return: TC_ACT_REDIRECT +- */ +- BPF_FUNC_redirect, ++/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and ++ * BPF_FUNC_perf_event_read_value flags. ++ */ ++#define BPF_F_INDEX_MASK 0xffffffffULL ++#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK ++/* BPF_FUNC_perf_event_output for sk_buff input context. */ ++#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) ++ ++/* Current network namespace */ ++#define BPF_F_CURRENT_NETNS (-1L) ++ ++/* BPF_FUNC_skb_adjust_room flags. */ ++#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) ++ ++#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff ++#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 ++ ++#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) ++#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) ++#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) ++#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) ++#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ ++ BPF_ADJ_ROOM_ENCAP_L2_MASK) \ ++ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) ++ ++/* BPF_FUNC_sysctl_get_name flags. */ ++#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) ++ ++/* BPF_FUNC_sk_storage_get flags */ ++#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) ++ ++/* Mode for BPF_FUNC_skb_adjust_room helper. */ ++enum bpf_adj_room_mode { ++ BPF_ADJ_ROOM_NET, ++ BPF_ADJ_ROOM_MAC, ++}; + +- /** +- * bpf_get_route_realm(skb) - retrieve a dst's tclassid +- * @skb: pointer to skb +- * Return: realm if != 0 +- */ +- BPF_FUNC_get_route_realm, ++/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ ++enum bpf_hdr_start_off { ++ BPF_HDR_START_MAC, ++ BPF_HDR_START_NET, ++}; + +- /** +- * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample +- * @ctx: struct pt_regs* +- * @map: pointer to perf_event_array map +- * @index: index of event in the map +- * @data: data on stack to be output as raw data +- * @size: size of data +- * Return: 0 on success +- */ +- BPF_FUNC_perf_event_output, +- __BPF_FUNC_MAX_ID, ++/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ ++enum bpf_lwt_encap_mode { ++ BPF_LWT_ENCAP_SEG6, ++ BPF_LWT_ENCAP_SEG6_INLINE, ++ BPF_LWT_ENCAP_IP, + }; + ++#define __bpf_md_ptr(type, name) \ ++union { \ ++ type name; \ ++ __u64 :64; \ ++} __attribute__((aligned(8))) ++ + /* user accessible mirror of in-kernel sk_buff. + * new fields can only be added to the end of this structure + */ +@@ -291,11 +2985,632 @@ struct __sk_buff { + __u32 cb[5]; + __u32 hash; + __u32 tc_classid; ++ __u32 data; ++ __u32 data_end; ++ __u32 napi_id; ++ ++ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ ++ __u32 family; ++ __u32 remote_ip4; /* Stored in network byte order */ ++ __u32 local_ip4; /* Stored in network byte order */ ++ __u32 remote_ip6[4]; /* Stored in network byte order */ ++ __u32 local_ip6[4]; /* Stored in network byte order */ ++ __u32 remote_port; /* Stored in network byte order */ ++ __u32 local_port; /* stored in host byte order */ ++ /* ... here. */ ++ ++ __u32 data_meta; ++ __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); ++ __u64 tstamp; ++ __u32 wire_len; ++ __u32 gso_segs; ++ __bpf_md_ptr(struct bpf_sock *, sk); + }; + + struct bpf_tunnel_key { + __u32 tunnel_id; +- __u32 remote_ipv4; ++ union { ++ __u32 remote_ipv4; ++ __u32 remote_ipv6[4]; ++ }; ++ __u8 tunnel_tos; ++ __u8 tunnel_ttl; ++ __u16 tunnel_ext; /* Padding, future use. */ ++ __u32 tunnel_label; ++}; ++ ++/* user accessible mirror of in-kernel xfrm_state. ++ * new fields can only be added to the end of this structure ++ */ ++struct bpf_xfrm_state { ++ __u32 reqid; ++ __u32 spi; /* Stored in network byte order */ ++ __u16 family; ++ __u16 ext; /* Padding, future use. */ ++ union { ++ __u32 remote_ipv4; /* Stored in network byte order */ ++ __u32 remote_ipv6[4]; /* Stored in network byte order */ ++ }; ++}; ++ ++/* Generic BPF return codes which all BPF program types may support. ++ * The values are binary compatible with their TC_ACT_* counter-part to ++ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT ++ * programs. ++ * ++ * XDP is handled seprately, see XDP_*. ++ */ ++enum bpf_ret_code { ++ BPF_OK = 0, ++ /* 1 reserved */ ++ BPF_DROP = 2, ++ /* 3-6 reserved */ ++ BPF_REDIRECT = 7, ++ /* >127 are reserved for prog type specific return codes. ++ * ++ * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and ++ * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been ++ * changed and should be routed based on its new L3 header. ++ * (This is an L3 redirect, as opposed to L2 redirect ++ * represented by BPF_REDIRECT above). ++ */ ++ BPF_LWT_REROUTE = 128, ++}; ++ ++struct bpf_sock { ++ __u32 bound_dev_if; ++ __u32 family; ++ __u32 type; ++ __u32 protocol; ++ __u32 mark; ++ __u32 priority; ++ /* IP address also allows 1 and 2 bytes access */ ++ __u32 src_ip4; ++ __u32 src_ip6[4]; ++ __u32 src_port; /* host byte order */ ++ __u32 dst_port; /* network byte order */ ++ __u32 dst_ip4; ++ __u32 dst_ip6[4]; ++ __u32 state; ++}; ++ ++struct bpf_tcp_sock { ++ __u32 snd_cwnd; /* Sending congestion window */ ++ __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ ++ __u32 rtt_min; ++ __u32 snd_ssthresh; /* Slow start size threshold */ ++ __u32 rcv_nxt; /* What we want to receive next */ ++ __u32 snd_nxt; /* Next sequence we send */ ++ __u32 snd_una; /* First byte we want an ack for */ ++ __u32 mss_cache; /* Cached effective mss, not including SACKS */ ++ __u32 ecn_flags; /* ECN status bits. */ ++ __u32 rate_delivered; /* saved rate sample: packets delivered */ ++ __u32 rate_interval_us; /* saved rate sample: time elapsed */ ++ __u32 packets_out; /* Packets which are "in flight" */ ++ __u32 retrans_out; /* Retransmitted packets out */ ++ __u32 total_retrans; /* Total retransmits for entire connection */ ++ __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn ++ * total number of segments in. ++ */ ++ __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn ++ * total number of data segments in. ++ */ ++ __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut ++ * The total number of segments sent. ++ */ ++ __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut ++ * total number of data segments sent. ++ */ ++ __u32 lost_out; /* Lost packets */ ++ __u32 sacked_out; /* SACK'd packets */ ++ __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived ++ * sum(delta(rcv_nxt)), or how many bytes ++ * were acked. ++ */ ++ __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked ++ * sum(delta(snd_una)), or how many bytes ++ * were acked. ++ */ ++ __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups ++ * total number of DSACK blocks received ++ */ ++ __u32 delivered; /* Total data packets delivered incl. rexmits */ ++ __u32 delivered_ce; /* Like the above but only ECE marked packets */ ++ __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */ ++}; ++ ++struct bpf_sock_tuple { ++ union { ++ struct { ++ __be32 saddr; ++ __be32 daddr; ++ __be16 sport; ++ __be16 dport; ++ } ipv4; ++ struct { ++ __be32 saddr[4]; ++ __be32 daddr[4]; ++ __be16 sport; ++ __be16 dport; ++ } ipv6; ++ }; ++}; ++ ++struct bpf_xdp_sock { ++ __u32 queue_id; ++}; ++ ++#define XDP_PACKET_HEADROOM 256 ++ ++/* User return codes for XDP prog type. ++ * A valid XDP program must return one of these defined values. All other ++ * return codes are reserved for future use. Unknown return codes will ++ * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). ++ */ ++enum xdp_action { ++ XDP_ABORTED = 0, ++ XDP_DROP, ++ XDP_PASS, ++ XDP_TX, ++ XDP_REDIRECT, ++}; ++ ++/* user accessible metadata for XDP packet hook ++ * new fields must be added to the end of this structure ++ */ ++struct xdp_md { ++ __u32 data; ++ __u32 data_end; ++ __u32 data_meta; ++ /* Below access go through struct xdp_rxq_info */ ++ __u32 ingress_ifindex; /* rxq->dev->ifindex */ ++ __u32 rx_queue_index; /* rxq->queue_index */ ++}; ++ ++enum sk_action { ++ SK_DROP = 0, ++ SK_PASS, ++}; ++ ++/* user accessible metadata for SK_MSG packet hook, new fields must ++ * be added to the end of this structure ++ */ ++struct sk_msg_md { ++ __bpf_md_ptr(void *, data); ++ __bpf_md_ptr(void *, data_end); ++ ++ __u32 family; ++ __u32 remote_ip4; /* Stored in network byte order */ ++ __u32 local_ip4; /* Stored in network byte order */ ++ __u32 remote_ip6[4]; /* Stored in network byte order */ ++ __u32 local_ip6[4]; /* Stored in network byte order */ ++ __u32 remote_port; /* Stored in network byte order */ ++ __u32 local_port; /* stored in host byte order */ ++ __u32 size; /* Total size of sk_msg */ ++}; ++ ++struct sk_reuseport_md { ++ /* ++ * Start of directly accessible data. It begins from ++ * the tcp/udp header. ++ */ ++ __bpf_md_ptr(void *, data); ++ /* End of directly accessible data */ ++ __bpf_md_ptr(void *, data_end); ++ /* ++ * Total length of packet (starting from the tcp/udp header). ++ * Note that the directly accessible bytes (data_end - data) ++ * could be less than this "len". Those bytes could be ++ * indirectly read by a helper "bpf_skb_load_bytes()". ++ */ ++ __u32 len; ++ /* ++ * Eth protocol in the mac header (network byte order). e.g. ++ * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD) ++ */ ++ __u32 eth_protocol; ++ __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ ++ __u32 bind_inany; /* Is sock bound to an INANY address? */ ++ __u32 hash; /* A hash of the packet 4 tuples */ ++}; ++ ++#define BPF_TAG_SIZE 8 ++ ++struct bpf_prog_info { ++ __u32 type; ++ __u32 id; ++ __u8 tag[BPF_TAG_SIZE]; ++ __u32 jited_prog_len; ++ __u32 xlated_prog_len; ++ __aligned_u64 jited_prog_insns; ++ __aligned_u64 xlated_prog_insns; ++ __u64 load_time; /* ns since boottime */ ++ __u32 created_by_uid; ++ __u32 nr_map_ids; ++ __aligned_u64 map_ids; ++ char name[BPF_OBJ_NAME_LEN]; ++ __u32 ifindex; ++ __u32 gpl_compatible:1; ++ __u32 :31; /* alignment pad */ ++ __u64 netns_dev; ++ __u64 netns_ino; ++ __u32 nr_jited_ksyms; ++ __u32 nr_jited_func_lens; ++ __aligned_u64 jited_ksyms; ++ __aligned_u64 jited_func_lens; ++ __u32 btf_id; ++ __u32 func_info_rec_size; ++ __aligned_u64 func_info; ++ __u32 nr_func_info; ++ __u32 nr_line_info; ++ __aligned_u64 line_info; ++ __aligned_u64 jited_line_info; ++ __u32 nr_jited_line_info; ++ __u32 line_info_rec_size; ++ __u32 jited_line_info_rec_size; ++ __u32 nr_prog_tags; ++ __aligned_u64 prog_tags; ++ __u64 run_time_ns; ++ __u64 run_cnt; ++} __attribute__((aligned(8))); ++ ++struct bpf_map_info { ++ __u32 type; ++ __u32 id; ++ __u32 key_size; ++ __u32 value_size; ++ __u32 max_entries; ++ __u32 map_flags; ++ char name[BPF_OBJ_NAME_LEN]; ++ __u32 ifindex; ++ __u32 :32; ++ __u64 netns_dev; ++ __u64 netns_ino; ++ __u32 btf_id; ++ __u32 btf_key_type_id; ++ __u32 btf_value_type_id; ++} __attribute__((aligned(8))); ++ ++struct bpf_btf_info { ++ __aligned_u64 btf; ++ __u32 btf_size; ++ __u32 id; ++} __attribute__((aligned(8))); ++ ++/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed ++ * by user and intended to be used by socket (e.g. to bind to, depends on ++ * attach attach type). ++ */ ++struct bpf_sock_addr { ++ __u32 user_family; /* Allows 4-byte read, but no write. */ ++ __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. ++ * Stored in network byte order. ++ */ ++ __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. ++ * Stored in network byte order. ++ */ ++ __u32 user_port; /* Allows 4-byte read and write. ++ * Stored in network byte order ++ */ ++ __u32 family; /* Allows 4-byte read, but no write */ ++ __u32 type; /* Allows 4-byte read, but no write */ ++ __u32 protocol; /* Allows 4-byte read, but no write */ ++ __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write. ++ * Stored in network byte order. ++ */ ++ __u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write. ++ * Stored in network byte order. ++ */ ++ __bpf_md_ptr(struct bpf_sock *, sk); ++}; ++ ++/* User bpf_sock_ops struct to access socket values and specify request ops ++ * and their replies. ++ * Some of this fields are in network (bigendian) byte order and may need ++ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h). ++ * New fields can only be added at the end of this structure ++ */ ++struct bpf_sock_ops { ++ __u32 op; ++ union { ++ __u32 args[4]; /* Optionally passed to bpf program */ ++ __u32 reply; /* Returned by bpf program */ ++ __u32 replylong[4]; /* Optionally returned by bpf prog */ ++ }; ++ __u32 family; ++ __u32 remote_ip4; /* Stored in network byte order */ ++ __u32 local_ip4; /* Stored in network byte order */ ++ __u32 remote_ip6[4]; /* Stored in network byte order */ ++ __u32 local_ip6[4]; /* Stored in network byte order */ ++ __u32 remote_port; /* Stored in network byte order */ ++ __u32 local_port; /* stored in host byte order */ ++ __u32 is_fullsock; /* Some TCP fields are only valid if ++ * there is a full socket. If not, the ++ * fields read as zero. ++ */ ++ __u32 snd_cwnd; ++ __u32 srtt_us; /* Averaged RTT << 3 in usecs */ ++ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ ++ __u32 state; ++ __u32 rtt_min; ++ __u32 snd_ssthresh; ++ __u32 rcv_nxt; ++ __u32 snd_nxt; ++ __u32 snd_una; ++ __u32 mss_cache; ++ __u32 ecn_flags; ++ __u32 rate_delivered; ++ __u32 rate_interval_us; ++ __u32 packets_out; ++ __u32 retrans_out; ++ __u32 total_retrans; ++ __u32 segs_in; ++ __u32 data_segs_in; ++ __u32 segs_out; ++ __u32 data_segs_out; ++ __u32 lost_out; ++ __u32 sacked_out; ++ __u32 sk_txhash; ++ __u64 bytes_received; ++ __u64 bytes_acked; ++ __bpf_md_ptr(struct bpf_sock *, sk); ++}; ++ ++/* Definitions for bpf_sock_ops_cb_flags */ ++#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) ++#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) ++#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) ++#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) ++#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently ++ * supported cb flags ++ */ ++ ++/* List of known BPF sock_ops operators. ++ * New entries can only be added at the end ++ */ ++enum { ++ BPF_SOCK_OPS_VOID, ++ BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or ++ * -1 if default value should be used ++ */ ++ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized ++ * window (in packets) or -1 if default ++ * value should be used ++ */ ++ BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an ++ * active connection is initialized ++ */ ++ BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an ++ * active connection is ++ * established ++ */ ++ BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a ++ * passive connection is ++ * established ++ */ ++ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control ++ * needs ECN ++ */ ++ BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is ++ * based on the path and may be ++ * dependent on the congestion control ++ * algorithm. In general it indicates ++ * a congestion threshold. RTTs above ++ * this indicate congestion ++ */ ++ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. ++ * Arg1: value of icsk_retransmits ++ * Arg2: value of icsk_rto ++ * Arg3: whether RTO has expired ++ */ ++ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. ++ * Arg1: sequence number of 1st byte ++ * Arg2: # segments ++ * Arg3: return value of ++ * tcp_transmit_skb (0 => success) ++ */ ++ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. ++ * Arg1: old_state ++ * Arg2: new_state ++ */ ++ BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after ++ * socket transition to LISTEN state. ++ */ ++ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. ++ */ ++}; ++ ++/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect ++ * changes between the TCP and BPF versions. Ideally this should never happen. ++ * If it does, we need to add code to convert them before calling ++ * the BPF sock_ops function. ++ */ ++enum { ++ BPF_TCP_ESTABLISHED = 1, ++ BPF_TCP_SYN_SENT, ++ BPF_TCP_SYN_RECV, ++ BPF_TCP_FIN_WAIT1, ++ BPF_TCP_FIN_WAIT2, ++ BPF_TCP_TIME_WAIT, ++ BPF_TCP_CLOSE, ++ BPF_TCP_CLOSE_WAIT, ++ BPF_TCP_LAST_ACK, ++ BPF_TCP_LISTEN, ++ BPF_TCP_CLOSING, /* Now a valid state */ ++ BPF_TCP_NEW_SYN_RECV, ++ ++ BPF_TCP_MAX_STATES /* Leave at the end! */ ++}; ++ ++#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ ++#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ ++ ++struct bpf_perf_event_value { ++ __u64 counter; ++ __u64 enabled; ++ __u64 running; ++}; ++ ++#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) ++#define BPF_DEVCG_ACC_READ (1ULL << 1) ++#define BPF_DEVCG_ACC_WRITE (1ULL << 2) ++ ++#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) ++#define BPF_DEVCG_DEV_CHAR (1ULL << 1) ++ ++struct bpf_cgroup_dev_ctx { ++ /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ ++ __u32 access_type; ++ __u32 major; ++ __u32 minor; ++}; ++ ++struct bpf_raw_tracepoint_args { ++ __u64 args[0]; ++}; ++ ++/* DIRECT: Skip the FIB rules and go to FIB table associated with device ++ * OUTPUT: Do lookup from egress perspective; default is ingress ++ */ ++#define BPF_FIB_LOOKUP_DIRECT (1U << 0) ++#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) ++ ++enum { ++ BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ ++ BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ ++ BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ ++ BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ ++ BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ ++ BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ ++ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ ++ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ ++ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ ++}; ++ ++struct bpf_fib_lookup { ++ /* input: network family for lookup (AF_INET, AF_INET6) ++ * output: network family of egress nexthop ++ */ ++ __u8 family; ++ ++ /* set if lookup is to consider L4 data - e.g., FIB rules */ ++ __u8 l4_protocol; ++ __be16 sport; ++ __be16 dport; ++ ++ /* total length of packet from network header - used for MTU check */ ++ __u16 tot_len; ++ ++ /* input: L3 device index for lookup ++ * output: device index from FIB lookup ++ */ ++ __u32 ifindex; ++ ++ union { ++ /* inputs to lookup */ ++ __u8 tos; /* AF_INET */ ++ __be32 flowinfo; /* AF_INET6, flow_label + priority */ ++ ++ /* output: metric of fib result (IPv4/IPv6 only) */ ++ __u32 rt_metric; ++ }; ++ ++ union { ++ __be32 ipv4_src; ++ __u32 ipv6_src[4]; /* in6_addr; network order */ ++ }; ++ ++ /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in ++ * network header. output: bpf_fib_lookup sets to gateway address ++ * if FIB lookup returns gateway route ++ */ ++ union { ++ __be32 ipv4_dst; ++ __u32 ipv6_dst[4]; /* in6_addr; network order */ ++ }; ++ ++ /* output */ ++ __be16 h_vlan_proto; ++ __be16 h_vlan_TCI; ++ __u8 smac[6]; /* ETH_ALEN */ ++ __u8 dmac[6]; /* ETH_ALEN */ ++}; ++ ++enum bpf_task_fd_type { ++ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ ++ BPF_FD_TYPE_TRACEPOINT, /* tp name */ ++ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */ ++ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */ ++ BPF_FD_TYPE_UPROBE, /* filename + offset */ ++ BPF_FD_TYPE_URETPROBE, /* filename + offset */ ++}; ++ ++#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) ++#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) ++#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) ++ ++struct bpf_flow_keys { ++ __u16 nhoff; ++ __u16 thoff; ++ __u16 addr_proto; /* ETH_P_* of valid addrs */ ++ __u8 is_frag; ++ __u8 is_first_frag; ++ __u8 is_encap; ++ __u8 ip_proto; ++ __be16 n_proto; ++ __be16 sport; ++ __be16 dport; ++ union { ++ struct { ++ __be32 ipv4_src; ++ __be32 ipv4_dst; ++ }; ++ struct { ++ __u32 ipv6_src[4]; /* in6_addr; network order */ ++ __u32 ipv6_dst[4]; /* in6_addr; network order */ ++ }; ++ }; ++ __u32 flags; ++ __be32 flow_label; ++}; ++ ++struct bpf_func_info { ++ __u32 insn_off; ++ __u32 type_id; ++}; ++ ++#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) ++#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) ++ ++struct bpf_line_info { ++ __u32 insn_off; ++ __u32 file_name_off; ++ __u32 line_off; ++ __u32 line_col; ++}; ++ ++struct bpf_spin_lock { ++ __u32 val; ++}; ++ ++struct bpf_sysctl { ++ __u32 write; /* Sysctl is being read (= 0) or written (= 1). ++ * Allows 1,2,4-byte read, but no write. ++ */ ++ __u32 file_pos; /* Sysctl file position to read from, write to. ++ * Allows 1,2,4-byte read an 4-byte write. ++ */ ++}; ++ ++struct bpf_sockopt { ++ __bpf_md_ptr(struct bpf_sock *, sk); ++ __bpf_md_ptr(void *, optval); ++ __bpf_md_ptr(void *, optval_end); ++ ++ __s32 level; ++ __s32 optname; ++ __s32 optlen; ++ __s32 retval; + }; + + #endif /* _UAPI__LINUX_BPF_H__ */ +--- /dev/null ++++ b/include/uapi/linux/bpfilter.h +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _UAPI_LINUX_BPFILTER_H ++#define _UAPI_LINUX_BPFILTER_H ++ ++#include ++ ++enum { ++ BPFILTER_IPT_SO_SET_REPLACE = 64, ++ BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65, ++ BPFILTER_IPT_SET_MAX, ++}; ++ ++enum { ++ BPFILTER_IPT_SO_GET_INFO = 64, ++ BPFILTER_IPT_SO_GET_ENTRIES = 65, ++ BPFILTER_IPT_SO_GET_REVISION_MATCH = 66, ++ BPFILTER_IPT_SO_GET_REVISION_TARGET = 67, ++ BPFILTER_IPT_GET_MAX, ++}; ++ ++#endif /* _UAPI_LINUX_BPFILTER_H */ +--- /dev/null ++++ b/include/uapi/linux/bpf_perf_event.h +@@ -0,0 +1,19 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* Copyright (c) 2016 Facebook ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of version 2 of the GNU General Public ++ * License as published by the Free Software Foundation. ++ */ ++#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ ++#define _UAPI__LINUX_BPF_PERF_EVENT_H__ ++ ++#include ++ ++struct bpf_perf_event_data { ++ bpf_user_pt_regs_t regs; ++ __u64 sample_period; ++ __u64 addr; ++}; ++ ++#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */ +--- /dev/null ++++ b/include/uapi/linux/btf.h +@@ -0,0 +1,165 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* Copyright (c) 2018 Facebook */ ++#ifndef _UAPI__LINUX_BTF_H__ ++#define _UAPI__LINUX_BTF_H__ ++ ++#include ++ ++#define BTF_MAGIC 0xeB9F ++#define BTF_VERSION 1 ++ ++struct btf_header { ++ __u16 magic; ++ __u8 version; ++ __u8 flags; ++ __u32 hdr_len; ++ ++ /* All offsets are in bytes relative to the end of this header */ ++ __u32 type_off; /* offset of type section */ ++ __u32 type_len; /* length of type section */ ++ __u32 str_off; /* offset of string section */ ++ __u32 str_len; /* length of string section */ ++}; ++ ++/* Max # of type identifier */ ++#define BTF_MAX_TYPE 0x000fffff ++/* Max offset into the string section */ ++#define BTF_MAX_NAME_OFFSET 0x00ffffff ++/* Max # of struct/union/enum members or func args */ ++#define BTF_MAX_VLEN 0xffff ++ ++struct btf_type { ++ __u32 name_off; ++ /* "info" bits arrangement ++ * bits 0-15: vlen (e.g. # of struct's members) ++ * bits 16-23: unused ++ * bits 24-27: kind (e.g. int, ptr, array...etc) ++ * bits 28-30: unused ++ * bit 31: kind_flag, currently used by ++ * struct, union and fwd ++ */ ++ __u32 info; ++ /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. ++ * "size" tells the size of the type it is describing. ++ * ++ * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, ++ * FUNC, FUNC_PROTO and VAR. ++ * "type" is a type_id referring to another type. ++ */ ++ union { ++ __u32 size; ++ __u32 type; ++ }; ++}; ++ ++#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) ++#define BTF_INFO_VLEN(info) ((info) & 0xffff) ++#define BTF_INFO_KFLAG(info) ((info) >> 31) ++ ++#define BTF_KIND_UNKN 0 /* Unknown */ ++#define BTF_KIND_INT 1 /* Integer */ ++#define BTF_KIND_PTR 2 /* Pointer */ ++#define BTF_KIND_ARRAY 3 /* Array */ ++#define BTF_KIND_STRUCT 4 /* Struct */ ++#define BTF_KIND_UNION 5 /* Union */ ++#define BTF_KIND_ENUM 6 /* Enumeration */ ++#define BTF_KIND_FWD 7 /* Forward */ ++#define BTF_KIND_TYPEDEF 8 /* Typedef */ ++#define BTF_KIND_VOLATILE 9 /* Volatile */ ++#define BTF_KIND_CONST 10 /* Const */ ++#define BTF_KIND_RESTRICT 11 /* Restrict */ ++#define BTF_KIND_FUNC 12 /* Function */ ++#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ ++#define BTF_KIND_VAR 14 /* Variable */ ++#define BTF_KIND_DATASEC 15 /* Section */ ++#define BTF_KIND_MAX BTF_KIND_DATASEC ++#define NR_BTF_KINDS (BTF_KIND_MAX + 1) ++ ++/* For some specific BTF_KIND, "struct btf_type" is immediately ++ * followed by extra data. ++ */ ++ ++/* BTF_KIND_INT is followed by a u32 and the following ++ * is the 32 bits arrangement: ++ */ ++#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) ++#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16) ++#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) ++ ++/* Attributes stored in the BTF_INT_ENCODING */ ++#define BTF_INT_SIGNED (1 << 0) ++#define BTF_INT_CHAR (1 << 1) ++#define BTF_INT_BOOL (1 << 2) ++ ++/* BTF_KIND_ENUM is followed by multiple "struct btf_enum". ++ * The exact number of btf_enum is stored in the vlen (of the ++ * info in "struct btf_type"). ++ */ ++struct btf_enum { ++ __u32 name_off; ++ __s32 val; ++}; ++ ++/* BTF_KIND_ARRAY is followed by one "struct btf_array" */ ++struct btf_array { ++ __u32 type; ++ __u32 index_type; ++ __u32 nelems; ++}; ++ ++/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed ++ * by multiple "struct btf_member". The exact number ++ * of btf_member is stored in the vlen (of the info in ++ * "struct btf_type"). ++ */ ++struct btf_member { ++ __u32 name_off; ++ __u32 type; ++ /* If the type info kind_flag is set, the btf_member offset ++ * contains both member bitfield size and bit offset. The ++ * bitfield size is set for bitfield members. If the type ++ * info kind_flag is not set, the offset contains only bit ++ * offset. ++ */ ++ __u32 offset; ++}; ++ ++/* If the struct/union type info kind_flag is set, the ++ * following two macros are used to access bitfield_size ++ * and bit_offset from btf_member.offset. ++ */ ++#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24) ++#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff) ++ ++/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". ++ * The exact number of btf_param is stored in the vlen (of the ++ * info in "struct btf_type"). ++ */ ++struct btf_param { ++ __u32 name_off; ++ __u32 type; ++}; ++ ++enum { ++ BTF_VAR_STATIC = 0, ++ BTF_VAR_GLOBAL_ALLOCATED, ++}; ++ ++/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe ++ * additional information related to the variable such as its linkage. ++ */ ++struct btf_var { ++ __u32 linkage; ++}; ++ ++/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo" ++ * to describe all BTF_KIND_VAR types it contains along with it's ++ * in-section offset as well as size. ++ */ ++struct btf_var_secinfo { ++ __u32 type; ++ __u32 offset; ++ __u32 size; ++}; ++ ++#endif /* _UAPI__LINUX_BTF_H__ */ +--- a/kernel/bpf/arraymap.c ++++ b/kernel/bpf/arraymap.c +@@ -1,78 +1,141 @@ ++// SPDX-License-Identifier: GPL-2.0-only + /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of version 2 of the GNU General Public +- * License as published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, but +- * WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. ++ * Copyright (c) 2016,2017 Facebook + */ + #include ++#include + #include +-#include + #include + #include + #include + #include ++#include ++ ++#include "map_in_map.h" ++ ++#define ARRAY_CREATE_FLAG_MASK \ ++ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) ++ ++static void bpf_array_free_percpu(struct bpf_array *array) ++{ ++ int i; ++ ++ for (i = 0; i < array->map.max_entries; i++) { ++ free_percpu(array->pptrs[i]); ++ cond_resched(); ++ } ++} ++ ++static int bpf_array_alloc_percpu(struct bpf_array *array) ++{ ++ void __percpu *ptr; ++ int i; ++ ++ for (i = 0; i < array->map.max_entries; i++) { ++ ptr = __alloc_percpu_gfp(array->elem_size, 8, ++ GFP_USER | __GFP_NOWARN); ++ if (!ptr) { ++ bpf_array_free_percpu(array); ++ return -ENOMEM; ++ } ++ array->pptrs[i] = ptr; ++ cond_resched(); ++ } ++ ++ return 0; ++} + + /* Called from syscall */ +-static struct bpf_map *array_map_alloc(union bpf_attr *attr) ++int array_map_alloc_check(union bpf_attr *attr) + { +- struct bpf_array *array; +- u32 elem_size, array_size; +- u32 index_mask, max_entries; +- bool unpriv = !capable(CAP_SYS_ADMIN); ++ bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; ++ int numa_node = bpf_map_attr_numa_node(attr); + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || +- attr->value_size == 0) +- return ERR_PTR(-EINVAL); ++ attr->value_size == 0 || ++ attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || ++ !bpf_map_flags_access_ok(attr->map_flags) || ++ (percpu && numa_node != NUMA_NO_NODE)) ++ return -EINVAL; + +- if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1)) ++ if (attr->value_size > KMALLOC_MAX_SIZE) + /* if value_size is bigger, the user space won't be able to + * access the elements. + */ +- return ERR_PTR(-E2BIG); ++ return -E2BIG; ++ ++ return 0; ++} ++ ++static struct bpf_map *array_map_alloc(union bpf_attr *attr) ++{ ++ bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; ++ int ret, numa_node = bpf_map_attr_numa_node(attr); ++ u32 elem_size, index_mask, max_entries; ++ bool unpriv = !capable(CAP_SYS_ADMIN); ++ u64 cost, array_size, mask64; ++ struct bpf_map_memory mem; ++ struct bpf_array *array; + + elem_size = round_up(attr->value_size, 8); + + max_entries = attr->max_entries; +- index_mask = roundup_pow_of_two(max_entries) - 1; + +- if (unpriv) ++ /* On 32 bit archs roundup_pow_of_two() with max_entries that has ++ * upper most bit set in u32 space is undefined behavior due to ++ * resulting 1U << 32, so do it manually here in u64 space. ++ */ ++ mask64 = fls_long(max_entries - 1); ++ mask64 = 1ULL << mask64; ++ mask64 -= 1; ++ ++ index_mask = mask64; ++ if (unpriv) { + /* round up array size to nearest power of 2, + * since cpu will speculate within index_mask limits + */ + max_entries = index_mask + 1; ++ /* Check for overflows. */ ++ if (max_entries < attr->max_entries) ++ return ERR_PTR(-E2BIG); ++ } + +- +- /* check round_up into zero and u32 overflow */ +- if (elem_size == 0 || +- attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) +- return ERR_PTR(-ENOMEM); +- +- array_size = sizeof(*array) + max_entries * elem_size; ++ array_size = sizeof(*array); ++ if (percpu) ++ array_size += (u64) max_entries * sizeof(void *); ++ else ++ array_size += (u64) max_entries * elem_size; ++ ++ /* make sure there is no u32 overflow later in round_up() */ ++ cost = array_size; ++ if (percpu) ++ cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); ++ ++ ret = bpf_map_charge_init(&mem, cost); ++ if (ret < 0) ++ return ERR_PTR(ret); + + /* allocate all map elements and zero-initialize them */ +- array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); ++ array = bpf_map_area_alloc(array_size, numa_node); + if (!array) { +- array = vzalloc(array_size); +- if (!array) +- return ERR_PTR(-ENOMEM); ++ bpf_map_charge_finish(&mem); ++ return ERR_PTR(-ENOMEM); + } +- + array->index_mask = index_mask; + array->map.unpriv_array = unpriv; + + /* copy mandatory map attributes */ +- array->map.key_size = attr->key_size; +- array->map.value_size = attr->value_size; +- array->map.max_entries = attr->max_entries; +- array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; ++ bpf_map_init_from_attr(&array->map, attr); ++ bpf_map_charge_move(&array->map.memory, &mem); + array->elem_size = elem_size; + ++ if (percpu && bpf_array_alloc_percpu(array)) { ++ bpf_map_charge_finish(&array->map.memory); ++ bpf_map_area_free(array); ++ return ERR_PTR(-ENOMEM); ++ } ++ + return &array->map; + } + +@@ -82,17 +145,115 @@ static void *array_map_lookup_elem(struc + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + +- if (index >= array->map.max_entries) ++ if (unlikely(index >= array->map.max_entries)) + return NULL; + + return array->value + array->elem_size * (index & array->index_mask); + } + ++static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, ++ u32 off) ++{ ++ struct bpf_array *array = container_of(map, struct bpf_array, map); ++ ++ if (map->max_entries != 1) ++ return -ENOTSUPP; ++ if (off >= map->value_size) ++ return -EINVAL; ++ ++ *imm = (unsigned long)array->value; ++ return 0; ++} ++ ++static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm, ++ u32 *off) ++{ ++ struct bpf_array *array = container_of(map, struct bpf_array, map); ++ u64 base = (unsigned long)array->value; ++ u64 range = array->elem_size; ++ ++ if (map->max_entries != 1) ++ return -ENOTSUPP; ++ if (imm < base || imm >= base + range) ++ return -ENOENT; ++ ++ *off = imm - base; ++ return 0; ++} ++ ++/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ ++static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) ++{ ++ struct bpf_array *array = container_of(map, struct bpf_array, map); ++ struct bpf_insn *insn = insn_buf; ++ u32 elem_size = round_up(map->value_size, 8); ++ const int ret = BPF_REG_0; ++ const int map_ptr = BPF_REG_1; ++ const int index = BPF_REG_2; ++ ++ *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); ++ *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); ++ if (map->unpriv_array) { ++ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); ++ *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); ++ } else { ++ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); ++ } ++ ++ if (is_power_of_2(elem_size)) { ++ *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); ++ } else { ++ *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); ++ } ++ *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); ++ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); ++ *insn++ = BPF_MOV64_IMM(ret, 0); ++ return insn - insn_buf; ++} ++ ++/* Called from eBPF program */ ++static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) ++{ ++ struct bpf_array *array = container_of(map, struct bpf_array, map); ++ u32 index = *(u32 *)key; ++ ++ if (unlikely(index >= array->map.max_entries)) ++ return NULL; ++ ++ return this_cpu_ptr(array->pptrs[index & array->index_mask]); ++} ++ ++int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) ++{ ++ struct bpf_array *array = container_of(map, struct bpf_array, map); ++ u32 index = *(u32 *)key; ++ void __percpu *pptr; ++ int cpu, off = 0; ++ u32 size; ++ ++ if (unlikely(index >= array->map.max_entries)) ++ return -ENOENT; ++ ++ /* per_cpu areas are zero-filled and bpf programs can only ++ * access 'value_size' of them, so copying rounded areas ++ * will not leak any kernel data ++ */ ++ size = round_up(map->value_size, 8); ++ rcu_read_lock(); ++ pptr = array->pptrs[index & array->index_mask]; ++ for_each_possible_cpu(cpu) { ++ bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); ++ off += size; ++ } ++ rcu_read_unlock(); ++ return 0; ++} ++ + /* Called from syscall */ + static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) + { + struct bpf_array *array = container_of(map, struct bpf_array, map); +- u32 index = *(u32 *)key; ++ u32 index = key ? *(u32 *)key : U32_MAX; + u32 *next = (u32 *)next_key; + + if (index >= array->map.max_entries) { +@@ -113,22 +274,73 @@ static int array_map_update_elem(struct + { + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; ++ char *val; + +- if (map_flags > BPF_EXIST) ++ if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) + /* unknown flags */ + return -EINVAL; + +- if (index >= array->map.max_entries) ++ if (unlikely(index >= array->map.max_entries)) ++ /* all elements were pre-allocated, cannot insert a new one */ ++ return -E2BIG; ++ ++ if (unlikely(map_flags & BPF_NOEXIST)) ++ /* all elements already exist */ ++ return -EEXIST; ++ ++ if (unlikely((map_flags & BPF_F_LOCK) && ++ !map_value_has_spin_lock(map))) ++ return -EINVAL; ++ ++ if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { ++ memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), ++ value, map->value_size); ++ } else { ++ val = array->value + ++ array->elem_size * (index & array->index_mask); ++ if (map_flags & BPF_F_LOCK) ++ copy_map_value_locked(map, val, value, false); ++ else ++ copy_map_value(map, val, value); ++ } ++ return 0; ++} ++ ++int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, ++ u64 map_flags) ++{ ++ struct bpf_array *array = container_of(map, struct bpf_array, map); ++ u32 index = *(u32 *)key; ++ void __percpu *pptr; ++ int cpu, off = 0; ++ u32 size; ++ ++ if (unlikely(map_flags > BPF_EXIST)) ++ /* unknown flags */ ++ return -EINVAL; ++ ++ if (unlikely(index >= array->map.max_entries)) + /* all elements were pre-allocated, cannot insert a new one */ + return -E2BIG; + +- if (map_flags == BPF_NOEXIST) ++ if (unlikely(map_flags == BPF_NOEXIST)) + /* all elements already exist */ + return -EEXIST; + +- memcpy(array->value + +- array->elem_size * (index & array->index_mask), +- value, map->value_size); ++ /* the user space will provide round_up(value_size, 8) bytes that ++ * will be copied into per-cpu area. bpf programs can only access ++ * value_size of it. During lookup the same extra bytes will be ++ * returned or zeros which were zero-filled by percpu_alloc, ++ * so no kernel data leaks possible ++ */ ++ size = round_up(map->value_size, 8); ++ rcu_read_lock(); ++ pptr = array->pptrs[index & array->index_mask]; ++ for_each_possible_cpu(cpu) { ++ bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); ++ off += size; ++ } ++ rcu_read_unlock(); + return 0; + } + +@@ -150,36 +362,124 @@ static void array_map_free(struct bpf_ma + */ + synchronize_rcu(); + +- kvfree(array); ++ if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) ++ bpf_array_free_percpu(array); ++ ++ bpf_map_area_free(array); + } + +-static const struct bpf_map_ops array_ops = { ++static void array_map_seq_show_elem(struct bpf_map *map, void *key, ++ struct seq_file *m) ++{ ++ void *value; ++ ++ rcu_read_lock(); ++ ++ value = array_map_lookup_elem(map, key); ++ if (!value) { ++ rcu_read_unlock(); ++ return; ++ } ++ ++ if (map->btf_key_type_id) ++ seq_printf(m, "%u: ", *(u32 *)key); ++ btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); ++ seq_puts(m, "\n"); ++ ++ rcu_read_unlock(); ++} ++ ++static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, ++ struct seq_file *m) ++{ ++ struct bpf_array *array = container_of(map, struct bpf_array, map); ++ u32 index = *(u32 *)key; ++ void __percpu *pptr; ++ int cpu; ++ ++ rcu_read_lock(); ++ ++ seq_printf(m, "%u: {\n", *(u32 *)key); ++ pptr = array->pptrs[index & array->index_mask]; ++ for_each_possible_cpu(cpu) { ++ seq_printf(m, "\tcpu%d: ", cpu); ++ btf_type_seq_show(map->btf, map->btf_value_type_id, ++ per_cpu_ptr(pptr, cpu), m); ++ seq_puts(m, "\n"); ++ } ++ seq_puts(m, "}\n"); ++ ++ rcu_read_unlock(); ++} ++ ++static int array_map_check_btf(const struct bpf_map *map, ++ const struct btf *btf, ++ const struct btf_type *key_type, ++ const struct btf_type *value_type) ++{ ++ u32 int_data; ++ ++ /* One exception for keyless BTF: .bss/.data/.rodata map */ ++ if (btf_type_is_void(key_type)) { ++ if (map->map_type != BPF_MAP_TYPE_ARRAY || ++ map->max_entries != 1) ++ return -EINVAL; ++ ++ if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC) ++ return -EINVAL; ++ ++ return 0; ++ } ++ ++ if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) ++ return -EINVAL; ++ ++ int_data = *(u32 *)(key_type + 1); ++ /* bpf array can only take a u32 key. This check makes sure ++ * that the btf matches the attr used during map_create. ++ */ ++ if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++const struct bpf_map_ops array_map_ops = { ++ .map_alloc_check = array_map_alloc_check, + .map_alloc = array_map_alloc, + .map_free = array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = array_map_lookup_elem, + .map_update_elem = array_map_update_elem, + .map_delete_elem = array_map_delete_elem, ++ .map_gen_lookup = array_map_gen_lookup, ++ .map_direct_value_addr = array_map_direct_value_addr, ++ .map_direct_value_meta = array_map_direct_value_meta, ++ .map_seq_show_elem = array_map_seq_show_elem, ++ .map_check_btf = array_map_check_btf, + }; + +-static struct bpf_map_type_list array_type __read_mostly = { +- .ops = &array_ops, +- .type = BPF_MAP_TYPE_ARRAY, ++const struct bpf_map_ops percpu_array_map_ops = { ++ .map_alloc_check = array_map_alloc_check, ++ .map_alloc = array_map_alloc, ++ .map_free = array_map_free, ++ .map_get_next_key = array_map_get_next_key, ++ .map_lookup_elem = percpu_array_map_lookup_elem, ++ .map_update_elem = array_map_update_elem, ++ .map_delete_elem = array_map_delete_elem, ++ .map_seq_show_elem = percpu_array_map_seq_show_elem, ++ .map_check_btf = array_map_check_btf, + }; + +-static int __init register_array_map(void) +-{ +- bpf_register_map_type(&array_type); +- return 0; +-} +-late_initcall(register_array_map); +- +-static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) ++static int fd_array_map_alloc_check(union bpf_attr *attr) + { + /* only file descriptors can be stored in this type of map */ + if (attr->value_size != sizeof(u32)) +- return ERR_PTR(-EINVAL); +- return array_map_alloc(attr); ++ return -EINVAL; ++ /* Program read-only/write-only not supported for special maps yet. */ ++ if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) ++ return -EINVAL; ++ return array_map_alloc_check(attr); + } + + static void fd_array_map_free(struct bpf_map *map) +@@ -192,17 +492,38 @@ static void fd_array_map_free(struct bpf + /* make sure it's empty */ + for (i = 0; i < array->map.max_entries; i++) + BUG_ON(array->ptrs[i] != NULL); +- kvfree(array); ++ ++ bpf_map_area_free(array); + } + + static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) + { +- return NULL; ++ return ERR_PTR(-EOPNOTSUPP); ++} ++ ++/* only called from syscall */ ++int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) ++{ ++ void **elem, *ptr; ++ int ret = 0; ++ ++ if (!map->ops->map_fd_sys_lookup_elem) ++ return -ENOTSUPP; ++ ++ rcu_read_lock(); ++ elem = array_map_lookup_elem(map, key); ++ if (elem && (ptr = READ_ONCE(*elem))) ++ *value = map->ops->map_fd_sys_lookup_elem(ptr); ++ else ++ ret = -ENOENT; ++ rcu_read_unlock(); ++ ++ return ret; + } + + /* only called from syscall */ +-static int fd_array_map_update_elem(struct bpf_map *map, void *key, +- void *value, u64 map_flags) ++int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, ++ void *key, void *value, u64 map_flags) + { + struct bpf_array *array = container_of(map, struct bpf_array, map); + void *new_ptr, *old_ptr; +@@ -215,7 +536,7 @@ static int fd_array_map_update_elem(stru + return -E2BIG; + + ufd = *(u32 *)value; +- new_ptr = map->ops->map_fd_get_ptr(map, ufd); ++ new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); + if (IS_ERR(new_ptr)) + return PTR_ERR(new_ptr); + +@@ -244,10 +565,12 @@ static int fd_array_map_delete_elem(stru + } + } + +-static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd) ++static void *prog_fd_array_get_ptr(struct bpf_map *map, ++ struct file *map_file, int fd) + { + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog = bpf_prog_get(fd); ++ + if (IS_ERR(prog)) + return prog; + +@@ -255,18 +578,22 @@ static void *prog_fd_array_get_ptr(struc + bpf_prog_put(prog); + return ERR_PTR(-EINVAL); + } ++ + return prog; + } + + static void prog_fd_array_put_ptr(void *ptr) + { +- struct bpf_prog *prog = ptr; ++ bpf_prog_put(ptr); ++} + +- bpf_prog_put_rcu(prog); ++static u32 prog_fd_array_sys_lookup_elem(void *ptr) ++{ ++ return ((struct bpf_prog *)ptr)->aux->id; + } + + /* decrement refcnt of all bpf_progs that are stored in this map */ +-void bpf_fd_array_map_clear(struct bpf_map *map) ++static void bpf_fd_array_map_clear(struct bpf_map *map) + { + struct bpf_array *array = container_of(map, struct bpf_array, map); + int i; +@@ -275,91 +602,208 @@ void bpf_fd_array_map_clear(struct bpf_m + fd_array_map_delete_elem(map, &i); + } + +-static const struct bpf_map_ops prog_array_ops = { +- .map_alloc = fd_array_map_alloc, ++static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key, ++ struct seq_file *m) ++{ ++ void **elem, *ptr; ++ u32 prog_id; ++ ++ rcu_read_lock(); ++ ++ elem = array_map_lookup_elem(map, key); ++ if (elem) { ++ ptr = READ_ONCE(*elem); ++ if (ptr) { ++ seq_printf(m, "%u: ", *(u32 *)key); ++ prog_id = prog_fd_array_sys_lookup_elem(ptr); ++ btf_type_seq_show(map->btf, map->btf_value_type_id, ++ &prog_id, m); ++ seq_puts(m, "\n"); ++ } ++ } ++ ++ rcu_read_unlock(); ++} ++ ++const struct bpf_map_ops prog_array_map_ops = { ++ .map_alloc_check = fd_array_map_alloc_check, ++ .map_alloc = array_map_alloc, + .map_free = fd_array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = fd_array_map_lookup_elem, +- .map_update_elem = fd_array_map_update_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = prog_fd_array_get_ptr, + .map_fd_put_ptr = prog_fd_array_put_ptr, ++ .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, ++ .map_release_uref = bpf_fd_array_map_clear, ++ .map_seq_show_elem = prog_array_map_seq_show_elem, + }; + +-static struct bpf_map_type_list prog_array_type __read_mostly = { +- .ops = &prog_array_ops, +- .type = BPF_MAP_TYPE_PROG_ARRAY, +-}; ++static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, ++ struct file *map_file) ++{ ++ struct bpf_event_entry *ee; ++ ++ ee = kzalloc(sizeof(*ee), GFP_ATOMIC); ++ if (ee) { ++ ee->event = perf_file->private_data; ++ ee->perf_file = perf_file; ++ ee->map_file = map_file; ++ } + +-static int __init register_prog_array_map(void) ++ return ee; ++} ++ ++static void __bpf_event_entry_free(struct rcu_head *rcu) + { +- bpf_register_map_type(&prog_array_type); +- return 0; ++ struct bpf_event_entry *ee; ++ ++ ee = container_of(rcu, struct bpf_event_entry, rcu); ++ fput(ee->perf_file); ++ kfree(ee); + } +-late_initcall(register_prog_array_map); + +-static void perf_event_array_map_free(struct bpf_map *map) ++static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee) + { +- bpf_fd_array_map_clear(map); +- fd_array_map_free(map); ++ call_rcu(&ee->rcu, __bpf_event_entry_free); + } + +-static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) ++static void *perf_event_fd_array_get_ptr(struct bpf_map *map, ++ struct file *map_file, int fd) + { ++ struct bpf_event_entry *ee; + struct perf_event *event; +- const struct perf_event_attr *attr; ++ struct file *perf_file; ++ u64 value; + + event = perf_event_get(fd); + if (IS_ERR(event)) + return event; + +- attr = perf_event_attrs(event); +- if (IS_ERR(attr)) +- goto err; +- +- if (attr->inherit) +- goto err; ++ value = perf_event_read_local(event); + +- if (attr->type == PERF_TYPE_RAW) +- return event; +- +- if (attr->type == PERF_TYPE_HARDWARE) +- return event; ++ ee = bpf_event_entry_gen(perf_file, map_file); ++ if (ee) ++ return ee; + +- if (attr->type == PERF_TYPE_SOFTWARE && +- attr->config == PERF_COUNT_SW_BPF_OUTPUT) +- return event; +-err: +- perf_event_release_kernel(event); +- return ERR_PTR(-EINVAL); ++ ee = ERR_PTR(-ENOMEM); ++ return ee; + } + + static void perf_event_fd_array_put_ptr(void *ptr) + { +- struct perf_event *event = ptr; ++ bpf_event_entry_free_rcu(ptr); ++} + +- perf_event_release_kernel(event); ++static void perf_event_fd_array_release(struct bpf_map *map, ++ struct file *map_file) ++{ ++ struct bpf_array *array = container_of(map, struct bpf_array, map); ++ struct bpf_event_entry *ee; ++ int i; ++ ++ rcu_read_lock(); ++ for (i = 0; i < array->map.max_entries; i++) { ++ ee = READ_ONCE(array->ptrs[i]); ++ if (ee && ee->map_file == map_file) ++ fd_array_map_delete_elem(map, &i); ++ } ++ rcu_read_unlock(); + } + +-static const struct bpf_map_ops perf_event_array_ops = { +- .map_alloc = fd_array_map_alloc, +- .map_free = perf_event_array_map_free, ++const struct bpf_map_ops perf_event_array_map_ops = { ++ .map_alloc_check = fd_array_map_alloc_check, ++ .map_alloc = array_map_alloc, ++ .map_free = fd_array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = fd_array_map_lookup_elem, +- .map_update_elem = fd_array_map_update_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = perf_event_fd_array_get_ptr, + .map_fd_put_ptr = perf_event_fd_array_put_ptr, ++ .map_release = perf_event_fd_array_release, ++ .map_check_btf = map_check_no_btf, + }; + +-static struct bpf_map_type_list perf_event_array_type __read_mostly = { +- .ops = &perf_event_array_ops, +- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, +-}; ++static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) ++{ ++ struct bpf_map *map, *inner_map_meta; ++ ++ inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); ++ if (IS_ERR(inner_map_meta)) ++ return inner_map_meta; ++ ++ map = array_map_alloc(attr); ++ if (IS_ERR(map)) { ++ bpf_map_meta_free(inner_map_meta); ++ return map; ++ } ++ ++ map->inner_map_meta = inner_map_meta; + +-static int __init register_perf_event_array_map(void) ++ return map; ++} ++ ++static void array_of_map_free(struct bpf_map *map) + { +- bpf_register_map_type(&perf_event_array_type); +- return 0; ++ /* map->inner_map_meta is only accessed by syscall which ++ * is protected by fdget/fdput. ++ */ ++ bpf_map_meta_free(map->inner_map_meta); ++ bpf_fd_array_map_clear(map); ++ fd_array_map_free(map); + } +-late_initcall(register_perf_event_array_map); ++ ++static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) ++{ ++ struct bpf_map **inner_map = array_map_lookup_elem(map, key); ++ ++ if (!inner_map) ++ return NULL; ++ ++ return READ_ONCE(*inner_map); ++} ++ ++static u32 array_of_map_gen_lookup(struct bpf_map *map, ++ struct bpf_insn *insn_buf) ++{ ++ struct bpf_array *array = container_of(map, struct bpf_array, map); ++ u32 elem_size = round_up(map->value_size, 8); ++ struct bpf_insn *insn = insn_buf; ++ const int ret = BPF_REG_0; ++ const int map_ptr = BPF_REG_1; ++ const int index = BPF_REG_2; ++ ++ *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); ++ *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); ++ if (map->unpriv_array) { ++ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); ++ *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); ++ } else { ++ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); ++ } ++ if (is_power_of_2(elem_size)) ++ *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); ++ else ++ *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); ++ *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); ++ *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); ++ *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); ++ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); ++ *insn++ = BPF_MOV64_IMM(ret, 0); ++ ++ return insn - insn_buf; ++} ++ ++const struct bpf_map_ops array_of_maps_map_ops = { ++ .map_alloc_check = fd_array_map_alloc_check, ++ .map_alloc = array_of_map_alloc, ++ .map_free = array_of_map_free, ++ .map_get_next_key = array_map_get_next_key, ++ .map_lookup_elem = array_of_map_lookup_elem, ++ .map_delete_elem = fd_array_map_delete_elem, ++ .map_fd_get_ptr = bpf_map_fd_get_ptr, ++ .map_fd_put_ptr = bpf_map_fd_put_ptr, ++ .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, ++ .map_gen_lookup = array_of_map_gen_lookup, ++ .map_check_btf = map_check_no_btf, ++}; +--- /dev/null ++++ b/kernel/bpf/bpf_lru_list.c +@@ -0,0 +1,695 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* Copyright (c) 2016 Facebook ++ */ ++#include ++#include ++#include ++ ++#include "bpf_lru_list.h" ++ ++#define LOCAL_FREE_TARGET (128) ++#define LOCAL_NR_SCANS LOCAL_FREE_TARGET ++ ++#define PERCPU_FREE_TARGET (4) ++#define PERCPU_NR_SCANS PERCPU_FREE_TARGET ++ ++/* Helpers to get the local list index */ ++#define LOCAL_LIST_IDX(t) ((t) - BPF_LOCAL_LIST_T_OFFSET) ++#define LOCAL_FREE_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE) ++#define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING) ++#define IS_LOCAL_LIST_TYPE(t) ((t) >= BPF_LOCAL_LIST_T_OFFSET) ++ ++static int get_next_cpu(int cpu) ++{ ++ cpu = cpumask_next(cpu, cpu_possible_mask); ++ if (cpu >= nr_cpu_ids) ++ cpu = cpumask_first(cpu_possible_mask); ++ return cpu; ++} ++ ++/* Local list helpers */ ++static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l) ++{ ++ return &loc_l->lists[LOCAL_FREE_LIST_IDX]; ++} ++ ++static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l) ++{ ++ return &loc_l->lists[LOCAL_PENDING_LIST_IDX]; ++} ++ ++/* bpf_lru_node helpers */ ++static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node) ++{ ++ return node->ref; ++} ++ ++static void bpf_lru_list_count_inc(struct bpf_lru_list *l, ++ enum bpf_lru_list_type type) ++{ ++ if (type < NR_BPF_LRU_LIST_COUNT) ++ l->counts[type]++; ++} ++ ++static void bpf_lru_list_count_dec(struct bpf_lru_list *l, ++ enum bpf_lru_list_type type) ++{ ++ if (type < NR_BPF_LRU_LIST_COUNT) ++ l->counts[type]--; ++} ++ ++static void __bpf_lru_node_move_to_free(struct bpf_lru_list *l, ++ struct bpf_lru_node *node, ++ struct list_head *free_list, ++ enum bpf_lru_list_type tgt_free_type) ++{ ++ if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) ++ return; ++ ++ /* If the removing node is the next_inactive_rotation candidate, ++ * move the next_inactive_rotation pointer also. ++ */ ++ if (&node->list == l->next_inactive_rotation) ++ l->next_inactive_rotation = l->next_inactive_rotation->prev; ++ ++ bpf_lru_list_count_dec(l, node->type); ++ ++ node->type = tgt_free_type; ++ list_move(&node->list, free_list); ++} ++ ++/* Move nodes from local list to the LRU list */ ++static void __bpf_lru_node_move_in(struct bpf_lru_list *l, ++ struct bpf_lru_node *node, ++ enum bpf_lru_list_type tgt_type) ++{ ++ if (WARN_ON_ONCE(!IS_LOCAL_LIST_TYPE(node->type)) || ++ WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type))) ++ return; ++ ++ bpf_lru_list_count_inc(l, tgt_type); ++ node->type = tgt_type; ++ node->ref = 0; ++ list_move(&node->list, &l->lists[tgt_type]); ++} ++ ++/* Move nodes between or within active and inactive list (like ++ * active to inactive, inactive to active or tail of active back to ++ * the head of active). ++ */ ++static void __bpf_lru_node_move(struct bpf_lru_list *l, ++ struct bpf_lru_node *node, ++ enum bpf_lru_list_type tgt_type) ++{ ++ if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)) || ++ WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type))) ++ return; ++ ++ if (node->type != tgt_type) { ++ bpf_lru_list_count_dec(l, node->type); ++ bpf_lru_list_count_inc(l, tgt_type); ++ node->type = tgt_type; ++ } ++ node->ref = 0; ++ ++ /* If the moving node is the next_inactive_rotation candidate, ++ * move the next_inactive_rotation pointer also. ++ */ ++ if (&node->list == l->next_inactive_rotation) ++ l->next_inactive_rotation = l->next_inactive_rotation->prev; ++ ++ list_move(&node->list, &l->lists[tgt_type]); ++} ++ ++static bool bpf_lru_list_inactive_low(const struct bpf_lru_list *l) ++{ ++ return l->counts[BPF_LRU_LIST_T_INACTIVE] < ++ l->counts[BPF_LRU_LIST_T_ACTIVE]; ++} ++ ++/* Rotate the active list: ++ * 1. Start from tail ++ * 2. If the node has the ref bit set, it will be rotated ++ * back to the head of active list with the ref bit cleared. ++ * Give this node one more chance to survive in the active list. ++ * 3. If the ref bit is not set, move it to the head of the ++ * inactive list. ++ * 4. It will at most scan nr_scans nodes ++ */ ++static void __bpf_lru_list_rotate_active(struct bpf_lru *lru, ++ struct bpf_lru_list *l) ++{ ++ struct list_head *active = &l->lists[BPF_LRU_LIST_T_ACTIVE]; ++ struct bpf_lru_node *node, *tmp_node, *first_node; ++ unsigned int i = 0; ++ ++ first_node = list_first_entry(active, struct bpf_lru_node, list); ++ list_for_each_entry_safe_reverse(node, tmp_node, active, list) { ++ if (bpf_lru_node_is_ref(node)) ++ __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); ++ else ++ __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); ++ ++ if (++i == lru->nr_scans || node == first_node) ++ break; ++ } ++} ++ ++/* Rotate the inactive list. It starts from the next_inactive_rotation ++ * 1. If the node has ref bit set, it will be moved to the head ++ * of active list with the ref bit cleared. ++ * 2. If the node does not have ref bit set, it will leave it ++ * at its current location (i.e. do nothing) so that it can ++ * be considered during the next inactive_shrink. ++ * 3. It will at most scan nr_scans nodes ++ */ ++static void __bpf_lru_list_rotate_inactive(struct bpf_lru *lru, ++ struct bpf_lru_list *l) ++{ ++ struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE]; ++ struct list_head *cur, *last, *next = inactive; ++ struct bpf_lru_node *node; ++ unsigned int i = 0; ++ ++ if (list_empty(inactive)) ++ return; ++ ++ last = l->next_inactive_rotation->next; ++ if (last == inactive) ++ last = last->next; ++ ++ cur = l->next_inactive_rotation; ++ while (i < lru->nr_scans) { ++ if (cur == inactive) { ++ cur = cur->prev; ++ continue; ++ } ++ ++ node = list_entry(cur, struct bpf_lru_node, list); ++ next = cur->prev; ++ if (bpf_lru_node_is_ref(node)) ++ __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); ++ if (cur == last) ++ break; ++ cur = next; ++ i++; ++ } ++ ++ l->next_inactive_rotation = next; ++} ++ ++/* Shrink the inactive list. It starts from the tail of the ++ * inactive list and only move the nodes without the ref bit ++ * set to the designated free list. ++ */ ++static unsigned int ++__bpf_lru_list_shrink_inactive(struct bpf_lru *lru, ++ struct bpf_lru_list *l, ++ unsigned int tgt_nshrink, ++ struct list_head *free_list, ++ enum bpf_lru_list_type tgt_free_type) ++{ ++ struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE]; ++ struct bpf_lru_node *node, *tmp_node; ++ unsigned int nshrinked = 0; ++ unsigned int i = 0; ++ ++ list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) { ++ if (bpf_lru_node_is_ref(node)) { ++ __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); ++ } else if (lru->del_from_htab(lru->del_arg, node)) { ++ __bpf_lru_node_move_to_free(l, node, free_list, ++ tgt_free_type); ++ if (++nshrinked == tgt_nshrink) ++ break; ++ } ++ ++ if (++i == lru->nr_scans) ++ break; ++ } ++ ++ return nshrinked; ++} ++ ++/* 1. Rotate the active list (if needed) ++ * 2. Always rotate the inactive list ++ */ ++static void __bpf_lru_list_rotate(struct bpf_lru *lru, struct bpf_lru_list *l) ++{ ++ if (bpf_lru_list_inactive_low(l)) ++ __bpf_lru_list_rotate_active(lru, l); ++ ++ __bpf_lru_list_rotate_inactive(lru, l); ++} ++ ++/* Calls __bpf_lru_list_shrink_inactive() to shrink some ++ * ref-bit-cleared nodes and move them to the designated ++ * free list. ++ * ++ * If it cannot get a free node after calling ++ * __bpf_lru_list_shrink_inactive(). It will just remove ++ * one node from either inactive or active list without ++ * honoring the ref-bit. It prefers inactive list to active ++ * list in this situation. ++ */ ++static unsigned int __bpf_lru_list_shrink(struct bpf_lru *lru, ++ struct bpf_lru_list *l, ++ unsigned int tgt_nshrink, ++ struct list_head *free_list, ++ enum bpf_lru_list_type tgt_free_type) ++ ++{ ++ struct bpf_lru_node *node, *tmp_node; ++ struct list_head *force_shrink_list; ++ unsigned int nshrinked; ++ ++ nshrinked = __bpf_lru_list_shrink_inactive(lru, l, tgt_nshrink, ++ free_list, tgt_free_type); ++ if (nshrinked) ++ return nshrinked; ++ ++ /* Do a force shrink by ignoring the reference bit */ ++ if (!list_empty(&l->lists[BPF_LRU_LIST_T_INACTIVE])) ++ force_shrink_list = &l->lists[BPF_LRU_LIST_T_INACTIVE]; ++ else ++ force_shrink_list = &l->lists[BPF_LRU_LIST_T_ACTIVE]; ++ ++ list_for_each_entry_safe_reverse(node, tmp_node, force_shrink_list, ++ list) { ++ if (lru->del_from_htab(lru->del_arg, node)) { ++ __bpf_lru_node_move_to_free(l, node, free_list, ++ tgt_free_type); ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++ ++/* Flush the nodes from the local pending list to the LRU list */ ++static void __local_list_flush(struct bpf_lru_list *l, ++ struct bpf_lru_locallist *loc_l) ++{ ++ struct bpf_lru_node *node, *tmp_node; ++ ++ list_for_each_entry_safe_reverse(node, tmp_node, ++ local_pending_list(loc_l), list) { ++ if (bpf_lru_node_is_ref(node)) ++ __bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_ACTIVE); ++ else ++ __bpf_lru_node_move_in(l, node, ++ BPF_LRU_LIST_T_INACTIVE); ++ } ++} ++ ++static void bpf_lru_list_push_free(struct bpf_lru_list *l, ++ struct bpf_lru_node *node) ++{ ++ unsigned long flags; ++ ++ if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) ++ return; ++ ++ raw_spin_lock_irqsave(&l->lock, flags); ++ __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); ++ raw_spin_unlock_irqrestore(&l->lock, flags); ++} ++ ++static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, ++ struct bpf_lru_locallist *loc_l) ++{ ++ struct bpf_lru_list *l = &lru->common_lru.lru_list; ++ struct bpf_lru_node *node, *tmp_node; ++ unsigned int nfree = 0; ++ ++ raw_spin_lock(&l->lock); ++ ++ __local_list_flush(l, loc_l); ++ ++ __bpf_lru_list_rotate(lru, l); ++ ++ list_for_each_entry_safe(node, tmp_node, &l->lists[BPF_LRU_LIST_T_FREE], ++ list) { ++ __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l), ++ BPF_LRU_LOCAL_LIST_T_FREE); ++ if (++nfree == LOCAL_FREE_TARGET) ++ break; ++ } ++ ++ if (nfree < LOCAL_FREE_TARGET) ++ __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree, ++ local_free_list(loc_l), ++ BPF_LRU_LOCAL_LIST_T_FREE); ++ ++ raw_spin_unlock(&l->lock); ++} ++ ++static void __local_list_add_pending(struct bpf_lru *lru, ++ struct bpf_lru_locallist *loc_l, ++ int cpu, ++ struct bpf_lru_node *node, ++ u32 hash) ++{ ++ *(u32 *)((void *)node + lru->hash_offset) = hash; ++ node->cpu = cpu; ++ node->type = BPF_LRU_LOCAL_LIST_T_PENDING; ++ node->ref = 0; ++ list_add(&node->list, local_pending_list(loc_l)); ++} ++ ++static struct bpf_lru_node * ++__local_list_pop_free(struct bpf_lru_locallist *loc_l) ++{ ++ struct bpf_lru_node *node; ++ ++ node = list_first_entry_or_null(local_free_list(loc_l), ++ struct bpf_lru_node, ++ list); ++ if (node) ++ list_del(&node->list); ++ ++ return node; ++} ++ ++static struct bpf_lru_node * ++__local_list_pop_pending(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l) ++{ ++ struct bpf_lru_node *node; ++ bool force = false; ++ ++ignore_ref: ++ /* Get from the tail (i.e. older element) of the pending list. */ ++ list_for_each_entry_reverse(node, local_pending_list(loc_l), ++ list) { ++ if ((!bpf_lru_node_is_ref(node) || force) && ++ lru->del_from_htab(lru->del_arg, node)) { ++ list_del(&node->list); ++ return node; ++ } ++ } ++ ++ if (!force) { ++ force = true; ++ goto ignore_ref; ++ } ++ ++ return NULL; ++} ++ ++static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru, ++ u32 hash) ++{ ++ struct list_head *free_list; ++ struct bpf_lru_node *node = NULL; ++ struct bpf_lru_list *l; ++ unsigned long flags; ++ int cpu = raw_smp_processor_id(); ++ ++ l = per_cpu_ptr(lru->percpu_lru, cpu); ++ ++ raw_spin_lock_irqsave(&l->lock, flags); ++ ++ __bpf_lru_list_rotate(lru, l); ++ ++ free_list = &l->lists[BPF_LRU_LIST_T_FREE]; ++ if (list_empty(free_list)) ++ __bpf_lru_list_shrink(lru, l, PERCPU_FREE_TARGET, free_list, ++ BPF_LRU_LIST_T_FREE); ++ ++ if (!list_empty(free_list)) { ++ node = list_first_entry(free_list, struct bpf_lru_node, list); ++ *(u32 *)((void *)node + lru->hash_offset) = hash; ++ node->ref = 0; ++ __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); ++ } ++ ++ raw_spin_unlock_irqrestore(&l->lock, flags); ++ ++ return node; ++} ++ ++static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru, ++ u32 hash) ++{ ++ struct bpf_lru_locallist *loc_l, *steal_loc_l; ++ struct bpf_common_lru *clru = &lru->common_lru; ++ struct bpf_lru_node *node; ++ int steal, first_steal; ++ unsigned long flags; ++ int cpu = raw_smp_processor_id(); ++ ++ loc_l = per_cpu_ptr(clru->local_list, cpu); ++ ++ raw_spin_lock_irqsave(&loc_l->lock, flags); ++ ++ node = __local_list_pop_free(loc_l); ++ if (!node) { ++ bpf_lru_list_pop_free_to_local(lru, loc_l); ++ node = __local_list_pop_free(loc_l); ++ } ++ ++ if (node) ++ __local_list_add_pending(lru, loc_l, cpu, node, hash); ++ ++ raw_spin_unlock_irqrestore(&loc_l->lock, flags); ++ ++ if (node) ++ return node; ++ ++ /* No free nodes found from the local free list and ++ * the global LRU list. ++ * ++ * Steal from the local free/pending list of the ++ * current CPU and remote CPU in RR. It starts ++ * with the loc_l->next_steal CPU. ++ */ ++ ++ first_steal = loc_l->next_steal; ++ steal = first_steal; ++ do { ++ steal_loc_l = per_cpu_ptr(clru->local_list, steal); ++ ++ raw_spin_lock_irqsave(&steal_loc_l->lock, flags); ++ ++ node = __local_list_pop_free(steal_loc_l); ++ if (!node) ++ node = __local_list_pop_pending(lru, steal_loc_l); ++ ++ raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags); ++ ++ steal = get_next_cpu(steal); ++ } while (!node && steal != first_steal); ++ ++ loc_l->next_steal = steal; ++ ++ if (node) { ++ raw_spin_lock_irqsave(&loc_l->lock, flags); ++ __local_list_add_pending(lru, loc_l, cpu, node, hash); ++ raw_spin_unlock_irqrestore(&loc_l->lock, flags); ++ } ++ ++ return node; ++} ++ ++struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash) ++{ ++ if (lru->percpu) ++ return bpf_percpu_lru_pop_free(lru, hash); ++ else ++ return bpf_common_lru_pop_free(lru, hash); ++} ++ ++static void bpf_common_lru_push_free(struct bpf_lru *lru, ++ struct bpf_lru_node *node) ++{ ++ u8 node_type = READ_ONCE(node->type); ++ unsigned long flags; ++ ++ if (WARN_ON_ONCE(node_type == BPF_LRU_LIST_T_FREE) || ++ WARN_ON_ONCE(node_type == BPF_LRU_LOCAL_LIST_T_FREE)) ++ return; ++ ++ if (node_type == BPF_LRU_LOCAL_LIST_T_PENDING) { ++ struct bpf_lru_locallist *loc_l; ++ ++ loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu); ++ ++ raw_spin_lock_irqsave(&loc_l->lock, flags); ++ ++ if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) { ++ raw_spin_unlock_irqrestore(&loc_l->lock, flags); ++ goto check_lru_list; ++ } ++ ++ node->type = BPF_LRU_LOCAL_LIST_T_FREE; ++ node->ref = 0; ++ list_move(&node->list, local_free_list(loc_l)); ++ ++ raw_spin_unlock_irqrestore(&loc_l->lock, flags); ++ return; ++ } ++ ++check_lru_list: ++ bpf_lru_list_push_free(&lru->common_lru.lru_list, node); ++} ++ ++static void bpf_percpu_lru_push_free(struct bpf_lru *lru, ++ struct bpf_lru_node *node) ++{ ++ struct bpf_lru_list *l; ++ unsigned long flags; ++ ++ l = per_cpu_ptr(lru->percpu_lru, node->cpu); ++ ++ raw_spin_lock_irqsave(&l->lock, flags); ++ ++ __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); ++ ++ raw_spin_unlock_irqrestore(&l->lock, flags); ++} ++ ++void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) ++{ ++ if (lru->percpu) ++ bpf_percpu_lru_push_free(lru, node); ++ else ++ bpf_common_lru_push_free(lru, node); ++} ++ ++static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, ++ u32 node_offset, u32 elem_size, ++ u32 nr_elems) ++{ ++ struct bpf_lru_list *l = &lru->common_lru.lru_list; ++ u32 i; ++ ++ for (i = 0; i < nr_elems; i++) { ++ struct bpf_lru_node *node; ++ ++ node = (struct bpf_lru_node *)(buf + node_offset); ++ node->type = BPF_LRU_LIST_T_FREE; ++ node->ref = 0; ++ list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); ++ buf += elem_size; ++ } ++} ++ ++static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, ++ u32 node_offset, u32 elem_size, ++ u32 nr_elems) ++{ ++ u32 i, pcpu_entries; ++ int cpu; ++ struct bpf_lru_list *l; ++ ++ pcpu_entries = nr_elems / num_possible_cpus(); ++ ++ i = 0; ++ ++ for_each_possible_cpu(cpu) { ++ struct bpf_lru_node *node; ++ ++ l = per_cpu_ptr(lru->percpu_lru, cpu); ++again: ++ node = (struct bpf_lru_node *)(buf + node_offset); ++ node->cpu = cpu; ++ node->type = BPF_LRU_LIST_T_FREE; ++ node->ref = 0; ++ list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); ++ i++; ++ buf += elem_size; ++ if (i == nr_elems) ++ break; ++ if (i % pcpu_entries) ++ goto again; ++ } ++} ++ ++void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, ++ u32 elem_size, u32 nr_elems) ++{ ++ if (lru->percpu) ++ bpf_percpu_lru_populate(lru, buf, node_offset, elem_size, ++ nr_elems); ++ else ++ bpf_common_lru_populate(lru, buf, node_offset, elem_size, ++ nr_elems); ++} ++ ++static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu) ++{ ++ int i; ++ ++ for (i = 0; i < NR_BPF_LRU_LOCAL_LIST_T; i++) ++ INIT_LIST_HEAD(&loc_l->lists[i]); ++ ++ loc_l->next_steal = cpu; ++ ++ raw_spin_lock_init(&loc_l->lock); ++} ++ ++static void bpf_lru_list_init(struct bpf_lru_list *l) ++{ ++ int i; ++ ++ for (i = 0; i < NR_BPF_LRU_LIST_T; i++) ++ INIT_LIST_HEAD(&l->lists[i]); ++ ++ for (i = 0; i < NR_BPF_LRU_LIST_COUNT; i++) ++ l->counts[i] = 0; ++ ++ l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE]; ++ ++ raw_spin_lock_init(&l->lock); ++} ++ ++int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, ++ del_from_htab_func del_from_htab, void *del_arg) ++{ ++ int cpu; ++ ++ if (percpu) { ++ lru->percpu_lru = alloc_percpu(struct bpf_lru_list); ++ if (!lru->percpu_lru) ++ return -ENOMEM; ++ ++ for_each_possible_cpu(cpu) { ++ struct bpf_lru_list *l; ++ ++ l = per_cpu_ptr(lru->percpu_lru, cpu); ++ bpf_lru_list_init(l); ++ } ++ lru->nr_scans = PERCPU_NR_SCANS; ++ } else { ++ struct bpf_common_lru *clru = &lru->common_lru; ++ ++ clru->local_list = alloc_percpu(struct bpf_lru_locallist); ++ if (!clru->local_list) ++ return -ENOMEM; ++ ++ for_each_possible_cpu(cpu) { ++ struct bpf_lru_locallist *loc_l; ++ ++ loc_l = per_cpu_ptr(clru->local_list, cpu); ++ bpf_lru_locallist_init(loc_l, cpu); ++ } ++ ++ bpf_lru_list_init(&clru->lru_list); ++ lru->nr_scans = LOCAL_NR_SCANS; ++ } ++ ++ lru->percpu = percpu; ++ lru->del_from_htab = del_from_htab; ++ lru->del_arg = del_arg; ++ lru->hash_offset = hash_offset; ++ ++ return 0; ++} ++ ++void bpf_lru_destroy(struct bpf_lru *lru) ++{ ++ if (lru->percpu) ++ free_percpu(lru->percpu_lru); ++ else ++ free_percpu(lru->common_lru.local_list); ++} +--- /dev/null ++++ b/kernel/bpf/bpf_lru_list.h +@@ -0,0 +1,82 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* Copyright (c) 2016 Facebook ++ */ ++#ifndef __BPF_LRU_LIST_H_ ++#define __BPF_LRU_LIST_H_ ++ ++#include ++#include ++ ++#define NR_BPF_LRU_LIST_T (3) ++#define NR_BPF_LRU_LIST_COUNT (2) ++#define NR_BPF_LRU_LOCAL_LIST_T (2) ++#define BPF_LOCAL_LIST_T_OFFSET NR_BPF_LRU_LIST_T ++ ++enum bpf_lru_list_type { ++ BPF_LRU_LIST_T_ACTIVE, ++ BPF_LRU_LIST_T_INACTIVE, ++ BPF_LRU_LIST_T_FREE, ++ BPF_LRU_LOCAL_LIST_T_FREE, ++ BPF_LRU_LOCAL_LIST_T_PENDING, ++}; ++ ++struct bpf_lru_node { ++ struct list_head list; ++ u16 cpu; ++ u8 type; ++ u8 ref; ++}; ++ ++struct bpf_lru_list { ++ struct list_head lists[NR_BPF_LRU_LIST_T]; ++ unsigned int counts[NR_BPF_LRU_LIST_COUNT]; ++ /* The next inacitve list rotation starts from here */ ++ struct list_head *next_inactive_rotation; ++ ++ raw_spinlock_t lock ____cacheline_aligned_in_smp; ++}; ++ ++struct bpf_lru_locallist { ++ struct list_head lists[NR_BPF_LRU_LOCAL_LIST_T]; ++ u16 next_steal; ++ raw_spinlock_t lock; ++}; ++ ++struct bpf_common_lru { ++ struct bpf_lru_list lru_list; ++ struct bpf_lru_locallist __percpu *local_list; ++}; ++ ++typedef bool (*del_from_htab_func)(void *arg, struct bpf_lru_node *node); ++ ++struct bpf_lru { ++ union { ++ struct bpf_common_lru common_lru; ++ struct bpf_lru_list __percpu *percpu_lru; ++ }; ++ del_from_htab_func del_from_htab; ++ void *del_arg; ++ unsigned int hash_offset; ++ unsigned int nr_scans; ++ bool percpu; ++}; ++ ++static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node) ++{ ++ /* ref is an approximation on access frequency. It does not ++ * have to be very accurate. Hence, no protection is used. ++ */ ++ if (!node->ref) ++ node->ref = 1; ++} ++ ++int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, ++ del_from_htab_func del_from_htab, void *delete_arg); ++void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, ++ u32 elem_size, u32 nr_elems); ++void bpf_lru_destroy(struct bpf_lru *lru); ++struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash); ++void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node); ++void bpf_lru_promote(struct bpf_lru *lru, struct bpf_lru_node *node); ++ ++#endif +--- /dev/null ++++ b/kernel/bpf/btf.c +@@ -0,0 +1,3514 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* Copyright (c) 2018 Facebook */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* BTF (BPF Type Format) is the meta data format which describes ++ * the data types of BPF program/map. Hence, it basically focus ++ * on the C programming language which the modern BPF is primary ++ * using. ++ * ++ * ELF Section: ++ * ~~~~~~~~~~~ ++ * The BTF data is stored under the ".BTF" ELF section ++ * ++ * struct btf_type: ++ * ~~~~~~~~~~~~~~~ ++ * Each 'struct btf_type' object describes a C data type. ++ * Depending on the type it is describing, a 'struct btf_type' ++ * object may be followed by more data. F.e. ++ * To describe an array, 'struct btf_type' is followed by ++ * 'struct btf_array'. ++ * ++ * 'struct btf_type' and any extra data following it are ++ * 4 bytes aligned. ++ * ++ * Type section: ++ * ~~~~~~~~~~~~~ ++ * The BTF type section contains a list of 'struct btf_type' objects. ++ * Each one describes a C type. Recall from the above section ++ * that a 'struct btf_type' object could be immediately followed by extra ++ * data in order to desribe some particular C types. ++ * ++ * type_id: ++ * ~~~~~~~ ++ * Each btf_type object is identified by a type_id. The type_id ++ * is implicitly implied by the location of the btf_type object in ++ * the BTF type section. The first one has type_id 1. The second ++ * one has type_id 2...etc. Hence, an earlier btf_type has ++ * a smaller type_id. ++ * ++ * A btf_type object may refer to another btf_type object by using ++ * type_id (i.e. the "type" in the "struct btf_type"). ++ * ++ * NOTE that we cannot assume any reference-order. ++ * A btf_type object can refer to an earlier btf_type object ++ * but it can also refer to a later btf_type object. ++ * ++ * For example, to describe "const void *". A btf_type ++ * object describing "const" may refer to another btf_type ++ * object describing "void *". This type-reference is done ++ * by specifying type_id: ++ * ++ * [1] CONST (anon) type_id=2 ++ * [2] PTR (anon) type_id=0 ++ * ++ * The above is the btf_verifier debug log: ++ * - Each line started with "[?]" is a btf_type object ++ * - [?] is the type_id of the btf_type object. ++ * - CONST/PTR is the BTF_KIND_XXX ++ * - "(anon)" is the name of the type. It just ++ * happens that CONST and PTR has no name. ++ * - type_id=XXX is the 'u32 type' in btf_type ++ * ++ * NOTE: "void" has type_id 0 ++ * ++ * String section: ++ * ~~~~~~~~~~~~~~ ++ * The BTF string section contains the names used by the type section. ++ * Each string is referred by an "offset" from the beginning of the ++ * string section. ++ * ++ * Each string is '\0' terminated. ++ * ++ * The first character in the string section must be '\0' ++ * which is used to mean 'anonymous'. Some btf_type may not ++ * have a name. ++ */ ++ ++/* BTF verification: ++ * ++ * To verify BTF data, two passes are needed. ++ * ++ * Pass #1 ++ * ~~~~~~~ ++ * The first pass is to collect all btf_type objects to ++ * an array: "btf->types". ++ * ++ * Depending on the C type that a btf_type is describing, ++ * a btf_type may be followed by extra data. We don't know ++ * how many btf_type is there, and more importantly we don't ++ * know where each btf_type is located in the type section. ++ * ++ * Without knowing the location of each type_id, most verifications ++ * cannot be done. e.g. an earlier btf_type may refer to a later ++ * btf_type (recall the "const void *" above), so we cannot ++ * check this type-reference in the first pass. ++ * ++ * In the first pass, it still does some verifications (e.g. ++ * checking the name is a valid offset to the string section). ++ * ++ * Pass #2 ++ * ~~~~~~~ ++ * The main focus is to resolve a btf_type that is referring ++ * to another type. ++ * ++ * We have to ensure the referring type: ++ * 1) does exist in the BTF (i.e. in btf->types[]) ++ * 2) does not cause a loop: ++ * struct A { ++ * struct B b; ++ * }; ++ * ++ * struct B { ++ * struct A a; ++ * }; ++ * ++ * btf_type_needs_resolve() decides if a btf_type needs ++ * to be resolved. ++ * ++ * The needs_resolve type implements the "resolve()" ops which ++ * essentially does a DFS and detects backedge. ++ * ++ * During resolve (or DFS), different C types have different ++ * "RESOLVED" conditions. ++ * ++ * When resolving a BTF_KIND_STRUCT, we need to resolve all its ++ * members because a member is always referring to another ++ * type. A struct's member can be treated as "RESOLVED" if ++ * it is referring to a BTF_KIND_PTR. Otherwise, the ++ * following valid C struct would be rejected: ++ * ++ * struct A { ++ * int m; ++ * struct A *a; ++ * }; ++ * ++ * When resolving a BTF_KIND_PTR, it needs to keep resolving if ++ * it is referring to another BTF_KIND_PTR. Otherwise, we cannot ++ * detect a pointer loop, e.g.: ++ * BTF_KIND_CONST -> BTF_KIND_PTR -> BTF_KIND_CONST -> BTF_KIND_PTR + ++ * ^ | ++ * +-----------------------------------------+ ++ * ++ */ ++ ++#define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2) ++#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1) ++#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK) ++#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3) ++#define BITS_ROUNDUP_BYTES(bits) \ ++ (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits)) ++ ++#define BTF_INFO_MASK 0x8f00ffff ++#define BTF_INT_MASK 0x0fffffff ++#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE) ++#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET) ++ ++/* 16MB for 64k structs and each has 16 members and ++ * a few MB spaces for the string section. ++ * The hard limit is S32_MAX. ++ */ ++#define BTF_MAX_SIZE (16 * 1024 * 1024) ++ ++#define for_each_member(i, struct_type, member) \ ++ for (i = 0, member = btf_type_member(struct_type); \ ++ i < btf_type_vlen(struct_type); \ ++ i++, member++) ++ ++#define for_each_member_from(i, from, struct_type, member) \ ++ for (i = from, member = btf_type_member(struct_type) + from; \ ++ i < btf_type_vlen(struct_type); \ ++ i++, member++) ++ ++#define for_each_vsi(i, struct_type, member) \ ++ for (i = 0, member = btf_type_var_secinfo(struct_type); \ ++ i < btf_type_vlen(struct_type); \ ++ i++, member++) ++ ++#define for_each_vsi_from(i, from, struct_type, member) \ ++ for (i = from, member = btf_type_var_secinfo(struct_type) + from; \ ++ i < btf_type_vlen(struct_type); \ ++ i++, member++) ++ ++DEFINE_IDR(btf_idr); ++DEFINE_SPINLOCK(btf_idr_lock); ++ ++struct btf { ++ void *data; ++ struct btf_type **types; ++ u32 *resolved_ids; ++ u32 *resolved_sizes; ++ const char *strings; ++ void *nohdr_data; ++ struct btf_header hdr; ++ u32 nr_types; ++ u32 types_size; ++ u32 data_size; ++ refcount_t refcnt; ++ u32 id; ++ struct rcu_head rcu; ++}; ++ ++enum verifier_phase { ++ CHECK_META, ++ CHECK_TYPE, ++}; ++ ++struct resolve_vertex { ++ const struct btf_type *t; ++ u32 type_id; ++ u16 next_member; ++}; ++ ++enum visit_state { ++ NOT_VISITED, ++ VISITED, ++ RESOLVED, ++}; ++ ++enum resolve_mode { ++ RESOLVE_TBD, /* To Be Determined */ ++ RESOLVE_PTR, /* Resolving for Pointer */ ++ RESOLVE_STRUCT_OR_ARRAY, /* Resolving for struct/union ++ * or array ++ */ ++}; ++ ++#define MAX_RESOLVE_DEPTH 32 ++ ++struct btf_sec_info { ++ u32 off; ++ u32 len; ++}; ++ ++struct btf_verifier_env { ++ struct btf *btf; ++ u8 *visit_states; ++ struct resolve_vertex stack[MAX_RESOLVE_DEPTH]; ++ struct bpf_verifier_log log; ++ u32 log_type_id; ++ u32 top_stack; ++ enum verifier_phase phase; ++ enum resolve_mode resolve_mode; ++}; ++ ++static const char * const btf_kind_str[NR_BTF_KINDS] = { ++ [BTF_KIND_UNKN] = "UNKNOWN", ++ [BTF_KIND_INT] = "INT", ++ [BTF_KIND_PTR] = "PTR", ++ [BTF_KIND_ARRAY] = "ARRAY", ++ [BTF_KIND_STRUCT] = "STRUCT", ++ [BTF_KIND_UNION] = "UNION", ++ [BTF_KIND_ENUM] = "ENUM", ++ [BTF_KIND_FWD] = "FWD", ++ [BTF_KIND_TYPEDEF] = "TYPEDEF", ++ [BTF_KIND_VOLATILE] = "VOLATILE", ++ [BTF_KIND_CONST] = "CONST", ++ [BTF_KIND_RESTRICT] = "RESTRICT", ++ [BTF_KIND_FUNC] = "FUNC", ++ [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", ++ [BTF_KIND_VAR] = "VAR", ++ [BTF_KIND_DATASEC] = "DATASEC", ++}; ++ ++struct btf_kind_operations { ++ s32 (*check_meta)(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left); ++ int (*resolve)(struct btf_verifier_env *env, ++ const struct resolve_vertex *v); ++ int (*check_member)(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type); ++ int (*check_kflag_member)(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type); ++ void (*log_details)(struct btf_verifier_env *env, ++ const struct btf_type *t); ++ void (*seq_show)(const struct btf *btf, const struct btf_type *t, ++ u32 type_id, void *data, u8 bits_offsets, ++ struct seq_file *m); ++}; ++ ++static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS]; ++static struct btf_type btf_void; ++ ++static int btf_resolve(struct btf_verifier_env *env, ++ const struct btf_type *t, u32 type_id); ++ ++static bool btf_type_is_modifier(const struct btf_type *t) ++{ ++ /* Some of them is not strictly a C modifier ++ * but they are grouped into the same bucket ++ * for BTF concern: ++ * A type (t) that refers to another ++ * type through t->type AND its size cannot ++ * be determined without following the t->type. ++ * ++ * ptr does not fall into this bucket ++ * because its size is always sizeof(void *). ++ */ ++ switch (BTF_INFO_KIND(t->info)) { ++ case BTF_KIND_TYPEDEF: ++ case BTF_KIND_VOLATILE: ++ case BTF_KIND_CONST: ++ case BTF_KIND_RESTRICT: ++ return true; ++ } ++ ++ return false; ++} ++ ++bool btf_type_is_void(const struct btf_type *t) ++{ ++ return t == &btf_void; ++} ++ ++static bool btf_type_is_fwd(const struct btf_type *t) ++{ ++ return BTF_INFO_KIND(t->info) == BTF_KIND_FWD; ++} ++ ++static bool btf_type_is_func(const struct btf_type *t) ++{ ++ return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC; ++} ++ ++static bool btf_type_is_func_proto(const struct btf_type *t) ++{ ++ return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO; ++} ++ ++static bool btf_type_nosize(const struct btf_type *t) ++{ ++ return btf_type_is_void(t) || btf_type_is_fwd(t) || ++ btf_type_is_func(t) || btf_type_is_func_proto(t); ++} ++ ++static bool btf_type_nosize_or_null(const struct btf_type *t) ++{ ++ return !t || btf_type_nosize(t); ++} ++ ++/* union is only a special case of struct: ++ * all its offsetof(member) == 0 ++ */ ++static bool btf_type_is_struct(const struct btf_type *t) ++{ ++ u8 kind = BTF_INFO_KIND(t->info); ++ ++ return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; ++} ++ ++static bool __btf_type_is_struct(const struct btf_type *t) ++{ ++ return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; ++} ++ ++static bool btf_type_is_array(const struct btf_type *t) ++{ ++ return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; ++} ++ ++static bool btf_type_is_ptr(const struct btf_type *t) ++{ ++ return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; ++} ++ ++static bool btf_type_is_int(const struct btf_type *t) ++{ ++ return BTF_INFO_KIND(t->info) == BTF_KIND_INT; ++} ++ ++static bool btf_type_is_var(const struct btf_type *t) ++{ ++ return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; ++} ++ ++static bool btf_type_is_datasec(const struct btf_type *t) ++{ ++ return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; ++} ++ ++/* Types that act only as a source, not sink or intermediate ++ * type when resolving. ++ */ ++static bool btf_type_is_resolve_source_only(const struct btf_type *t) ++{ ++ return btf_type_is_var(t) || ++ btf_type_is_datasec(t); ++} ++ ++/* What types need to be resolved? ++ * ++ * btf_type_is_modifier() is an obvious one. ++ * ++ * btf_type_is_struct() because its member refers to ++ * another type (through member->type). ++ * ++ * btf_type_is_var() because the variable refers to ++ * another type. btf_type_is_datasec() holds multiple ++ * btf_type_is_var() types that need resolving. ++ * ++ * btf_type_is_array() because its element (array->type) ++ * refers to another type. Array can be thought of a ++ * special case of struct while array just has the same ++ * member-type repeated by array->nelems of times. ++ */ ++static bool btf_type_needs_resolve(const struct btf_type *t) ++{ ++ return btf_type_is_modifier(t) || ++ btf_type_is_ptr(t) || ++ btf_type_is_struct(t) || ++ btf_type_is_array(t) || ++ btf_type_is_var(t) || ++ btf_type_is_datasec(t); ++} ++ ++/* t->size can be used */ ++static bool btf_type_has_size(const struct btf_type *t) ++{ ++ switch (BTF_INFO_KIND(t->info)) { ++ case BTF_KIND_INT: ++ case BTF_KIND_STRUCT: ++ case BTF_KIND_UNION: ++ case BTF_KIND_ENUM: ++ case BTF_KIND_DATASEC: ++ return true; ++ } ++ ++ return false; ++} ++ ++static const char *btf_int_encoding_str(u8 encoding) ++{ ++ if (encoding == 0) ++ return "(none)"; ++ else if (encoding == BTF_INT_SIGNED) ++ return "SIGNED"; ++ else if (encoding == BTF_INT_CHAR) ++ return "CHAR"; ++ else if (encoding == BTF_INT_BOOL) ++ return "BOOL"; ++ else ++ return "UNKN"; ++} ++ ++static u16 btf_type_vlen(const struct btf_type *t) ++{ ++ return BTF_INFO_VLEN(t->info); ++} ++ ++static bool btf_type_kflag(const struct btf_type *t) ++{ ++ return BTF_INFO_KFLAG(t->info); ++} ++ ++static u32 btf_member_bit_offset(const struct btf_type *struct_type, ++ const struct btf_member *member) ++{ ++ return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset) ++ : member->offset; ++} ++ ++static u32 btf_member_bitfield_size(const struct btf_type *struct_type, ++ const struct btf_member *member) ++{ ++ return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset) ++ : 0; ++} ++ ++static u32 btf_type_int(const struct btf_type *t) ++{ ++ return *(u32 *)(t + 1); ++} ++ ++static const struct btf_array *btf_type_array(const struct btf_type *t) ++{ ++ return (const struct btf_array *)(t + 1); ++} ++ ++static const struct btf_member *btf_type_member(const struct btf_type *t) ++{ ++ return (const struct btf_member *)(t + 1); ++} ++ ++static const struct btf_enum *btf_type_enum(const struct btf_type *t) ++{ ++ return (const struct btf_enum *)(t + 1); ++} ++ ++static const struct btf_var *btf_type_var(const struct btf_type *t) ++{ ++ return (const struct btf_var *)(t + 1); ++} ++ ++static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t) ++{ ++ return (const struct btf_var_secinfo *)(t + 1); ++} ++ ++static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) ++{ ++ return kind_ops[BTF_INFO_KIND(t->info)]; ++} ++ ++static bool btf_name_offset_valid(const struct btf *btf, u32 offset) ++{ ++ return BTF_STR_OFFSET_VALID(offset) && ++ offset < btf->hdr.str_len; ++} ++ ++static bool __btf_name_char_ok(char c, bool first, bool dot_ok) ++{ ++ if ((first ? !isalpha(c) : ++ !isalnum(c)) && ++ c != '_' && ++ ((c == '.' && !dot_ok) || ++ c != '.')) ++ return false; ++ return true; ++} ++ ++static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok) ++{ ++ /* offset must be valid */ ++ const char *src = &btf->strings[offset]; ++ const char *src_limit; ++ ++ if (!__btf_name_char_ok(*src, true, dot_ok)) ++ return false; ++ ++ /* set a limit on identifier length */ ++ src_limit = src + KSYM_NAME_LEN; ++ src++; ++ while (*src && src < src_limit) { ++ if (!__btf_name_char_ok(*src, false, dot_ok)) ++ return false; ++ src++; ++ } ++ ++ return !*src; ++} ++ ++/* Only C-style identifier is permitted. This can be relaxed if ++ * necessary. ++ */ ++static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) ++{ ++ return __btf_name_valid(btf, offset, false); ++} ++ ++static bool btf_name_valid_section(const struct btf *btf, u32 offset) ++{ ++ return __btf_name_valid(btf, offset, true); ++} ++ ++static const char *__btf_name_by_offset(const struct btf *btf, u32 offset) ++{ ++ if (!offset) ++ return "(anon)"; ++ else if (offset < btf->hdr.str_len) ++ return &btf->strings[offset]; ++ else ++ return "(invalid-name-offset)"; ++} ++ ++const char *btf_name_by_offset(const struct btf *btf, u32 offset) ++{ ++ if (offset < btf->hdr.str_len) ++ return &btf->strings[offset]; ++ ++ return NULL; ++} ++ ++const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) ++{ ++ if (type_id > btf->nr_types) ++ return NULL; ++ ++ return btf->types[type_id]; ++} ++ ++/* ++ * Regular int is not a bit field and it must be either ++ * u8/u16/u32/u64 or __int128. ++ */ ++static bool btf_type_int_is_regular(const struct btf_type *t) ++{ ++ u8 nr_bits, nr_bytes; ++ u32 int_data; ++ ++ int_data = btf_type_int(t); ++ nr_bits = BTF_INT_BITS(int_data); ++ nr_bytes = BITS_ROUNDUP_BYTES(nr_bits); ++ if (BITS_PER_BYTE_MASKED(nr_bits) || ++ BTF_INT_OFFSET(int_data) || ++ (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) && ++ nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) && ++ nr_bytes != (2 * sizeof(u64)))) { ++ return false; ++ } ++ ++ return true; ++} ++ ++/* ++ * Check that given struct member is a regular int with expected ++ * offset and size. ++ */ ++bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, ++ const struct btf_member *m, ++ u32 expected_offset, u32 expected_size) ++{ ++ const struct btf_type *t; ++ u32 id, int_data; ++ u8 nr_bits; ++ ++ id = m->type; ++ t = btf_type_id_size(btf, &id, NULL); ++ if (!t || !btf_type_is_int(t)) ++ return false; ++ ++ int_data = btf_type_int(t); ++ nr_bits = BTF_INT_BITS(int_data); ++ if (btf_type_kflag(s)) { ++ u32 bitfield_size = BTF_MEMBER_BITFIELD_SIZE(m->offset); ++ u32 bit_offset = BTF_MEMBER_BIT_OFFSET(m->offset); ++ ++ /* if kflag set, int should be a regular int and ++ * bit offset should be at byte boundary. ++ */ ++ return !bitfield_size && ++ BITS_ROUNDUP_BYTES(bit_offset) == expected_offset && ++ BITS_ROUNDUP_BYTES(nr_bits) == expected_size; ++ } ++ ++ if (BTF_INT_OFFSET(int_data) || ++ BITS_PER_BYTE_MASKED(m->offset) || ++ BITS_ROUNDUP_BYTES(m->offset) != expected_offset || ++ BITS_PER_BYTE_MASKED(nr_bits) || ++ BITS_ROUNDUP_BYTES(nr_bits) != expected_size) ++ return false; ++ ++ return true; ++} ++ ++__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log, ++ const char *fmt, ...) ++{ ++ va_list args; ++ ++ va_start(args, fmt); ++ bpf_verifier_vlog(log, fmt, args); ++ va_end(args); ++} ++ ++__printf(2, 3) static void btf_verifier_log(struct btf_verifier_env *env, ++ const char *fmt, ...) ++{ ++ struct bpf_verifier_log *log = &env->log; ++ va_list args; ++ ++ if (!bpf_verifier_log_needed(log)) ++ return; ++ ++ va_start(args, fmt); ++ bpf_verifier_vlog(log, fmt, args); ++ va_end(args); ++} ++ ++__printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ bool log_details, ++ const char *fmt, ...) ++{ ++ struct bpf_verifier_log *log = &env->log; ++ u8 kind = BTF_INFO_KIND(t->info); ++ struct btf *btf = env->btf; ++ va_list args; ++ ++ if (!bpf_verifier_log_needed(log)) ++ return; ++ ++ __btf_verifier_log(log, "[%u] %s %s%s", ++ env->log_type_id, ++ btf_kind_str[kind], ++ __btf_name_by_offset(btf, t->name_off), ++ log_details ? " " : ""); ++ ++ if (log_details) ++ btf_type_ops(t)->log_details(env, t); ++ ++ if (fmt && *fmt) { ++ __btf_verifier_log(log, " "); ++ va_start(args, fmt); ++ bpf_verifier_vlog(log, fmt, args); ++ va_end(args); ++ } ++ ++ __btf_verifier_log(log, "\n"); ++} ++ ++#define btf_verifier_log_type(env, t, ...) \ ++ __btf_verifier_log_type((env), (t), true, __VA_ARGS__) ++#define btf_verifier_log_basic(env, t, ...) \ ++ __btf_verifier_log_type((env), (t), false, __VA_ARGS__) ++ ++__printf(4, 5) ++static void btf_verifier_log_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const char *fmt, ...) ++{ ++ struct bpf_verifier_log *log = &env->log; ++ struct btf *btf = env->btf; ++ va_list args; ++ ++ if (!bpf_verifier_log_needed(log)) ++ return; ++ ++ /* The CHECK_META phase already did a btf dump. ++ * ++ * If member is logged again, it must hit an error in ++ * parsing this member. It is useful to print out which ++ * struct this member belongs to. ++ */ ++ if (env->phase != CHECK_META) ++ btf_verifier_log_type(env, struct_type, NULL); ++ ++ if (btf_type_kflag(struct_type)) ++ __btf_verifier_log(log, ++ "\t%s type_id=%u bitfield_size=%u bits_offset=%u", ++ __btf_name_by_offset(btf, member->name_off), ++ member->type, ++ BTF_MEMBER_BITFIELD_SIZE(member->offset), ++ BTF_MEMBER_BIT_OFFSET(member->offset)); ++ else ++ __btf_verifier_log(log, "\t%s type_id=%u bits_offset=%u", ++ __btf_name_by_offset(btf, member->name_off), ++ member->type, member->offset); ++ ++ if (fmt && *fmt) { ++ __btf_verifier_log(log, " "); ++ va_start(args, fmt); ++ bpf_verifier_vlog(log, fmt, args); ++ va_end(args); ++ } ++ ++ __btf_verifier_log(log, "\n"); ++} ++ ++__printf(4, 5) ++static void btf_verifier_log_vsi(struct btf_verifier_env *env, ++ const struct btf_type *datasec_type, ++ const struct btf_var_secinfo *vsi, ++ const char *fmt, ...) ++{ ++ struct bpf_verifier_log *log = &env->log; ++ va_list args; ++ ++ if (!bpf_verifier_log_needed(log)) ++ return; ++ if (env->phase != CHECK_META) ++ btf_verifier_log_type(env, datasec_type, NULL); ++ ++ __btf_verifier_log(log, "\t type_id=%u offset=%u size=%u", ++ vsi->type, vsi->offset, vsi->size); ++ if (fmt && *fmt) { ++ __btf_verifier_log(log, " "); ++ va_start(args, fmt); ++ bpf_verifier_vlog(log, fmt, args); ++ va_end(args); ++ } ++ ++ __btf_verifier_log(log, "\n"); ++} ++ ++static void btf_verifier_log_hdr(struct btf_verifier_env *env, ++ u32 btf_data_size) ++{ ++ struct bpf_verifier_log *log = &env->log; ++ const struct btf *btf = env->btf; ++ const struct btf_header *hdr; ++ ++ if (!bpf_verifier_log_needed(log)) ++ return; ++ ++ hdr = &btf->hdr; ++ __btf_verifier_log(log, "magic: 0x%x\n", hdr->magic); ++ __btf_verifier_log(log, "version: %u\n", hdr->version); ++ __btf_verifier_log(log, "flags: 0x%x\n", hdr->flags); ++ __btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len); ++ __btf_verifier_log(log, "type_off: %u\n", hdr->type_off); ++ __btf_verifier_log(log, "type_len: %u\n", hdr->type_len); ++ __btf_verifier_log(log, "str_off: %u\n", hdr->str_off); ++ __btf_verifier_log(log, "str_len: %u\n", hdr->str_len); ++ __btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size); ++} ++ ++static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) ++{ ++ struct btf *btf = env->btf; ++ ++ /* < 2 because +1 for btf_void which is always in btf->types[0]. ++ * btf_void is not accounted in btf->nr_types because btf_void ++ * does not come from the BTF file. ++ */ ++ if (btf->types_size - btf->nr_types < 2) { ++ /* Expand 'types' array */ ++ ++ struct btf_type **new_types; ++ u32 expand_by, new_size; ++ ++ if (btf->types_size == BTF_MAX_TYPE) { ++ btf_verifier_log(env, "Exceeded max num of types"); ++ return -E2BIG; ++ } ++ ++ expand_by = max_t(u32, btf->types_size >> 2, 16); ++ new_size = min_t(u32, BTF_MAX_TYPE, ++ btf->types_size + expand_by); ++ ++ new_types = kcalloc(new_size, sizeof(*new_types), ++ GFP_KERNEL | __GFP_NOWARN); ++ if (!new_types) ++ return -ENOMEM; ++ ++ if (btf->nr_types == 0) ++ new_types[0] = &btf_void; ++ else ++ memcpy(new_types, btf->types, ++ sizeof(*btf->types) * (btf->nr_types + 1)); ++ ++ kvfree(btf->types); ++ btf->types = new_types; ++ btf->types_size = new_size; ++ } ++ ++ btf->types[++(btf->nr_types)] = t; ++ ++ return 0; ++} ++ ++static int btf_alloc_id(struct btf *btf) ++{ ++ int id; ++ ++ idr_preload(GFP_KERNEL); ++ spin_lock_bh(&btf_idr_lock); ++ id = idr_alloc_cyclic(&btf_idr, btf, 1, INT_MAX, GFP_ATOMIC); ++ if (id > 0) ++ btf->id = id; ++ spin_unlock_bh(&btf_idr_lock); ++ idr_preload_end(); ++ ++ if (WARN_ON_ONCE(!id)) ++ return -ENOSPC; ++ ++ return id > 0 ? 0 : id; ++} ++ ++static void btf_free_id(struct btf *btf) ++{ ++ unsigned long flags; ++ ++ /* ++ * In map-in-map, calling map_delete_elem() on outer ++ * map will call bpf_map_put on the inner map. ++ * It will then eventually call btf_free_id() ++ * on the inner map. Some of the map_delete_elem() ++ * implementation may have irq disabled, so ++ * we need to use the _irqsave() version instead ++ * of the _bh() version. ++ */ ++ spin_lock_irqsave(&btf_idr_lock, flags); ++ idr_remove(&btf_idr, btf->id); ++ spin_unlock_irqrestore(&btf_idr_lock, flags); ++} ++ ++static void btf_free(struct btf *btf) ++{ ++ kvfree(btf->types); ++ kvfree(btf->resolved_sizes); ++ kvfree(btf->resolved_ids); ++ kvfree(btf->data); ++ kfree(btf); ++} ++ ++static void btf_free_rcu(struct rcu_head *rcu) ++{ ++ struct btf *btf = container_of(rcu, struct btf, rcu); ++ ++ btf_free(btf); ++} ++ ++void btf_put(struct btf *btf) ++{ ++ if (btf && refcount_dec_and_test(&btf->refcnt)) { ++ btf_free_id(btf); ++ call_rcu(&btf->rcu, btf_free_rcu); ++ } ++} ++ ++static int env_resolve_init(struct btf_verifier_env *env) ++{ ++ struct btf *btf = env->btf; ++ u32 nr_types = btf->nr_types; ++ u32 *resolved_sizes = NULL; ++ u32 *resolved_ids = NULL; ++ u8 *visit_states = NULL; ++ ++ /* +1 for btf_void */ ++ resolved_sizes = kcalloc(nr_types + 1, sizeof(*resolved_sizes), ++ GFP_KERNEL | __GFP_NOWARN); ++ if (!resolved_sizes) ++ goto nomem; ++ ++ resolved_ids = kcalloc(nr_types + 1, sizeof(*resolved_ids), ++ GFP_KERNEL | __GFP_NOWARN); ++ if (!resolved_ids) ++ goto nomem; ++ ++ visit_states = kcalloc(nr_types + 1, sizeof(*visit_states), ++ GFP_KERNEL | __GFP_NOWARN); ++ if (!visit_states) ++ goto nomem; ++ ++ btf->resolved_sizes = resolved_sizes; ++ btf->resolved_ids = resolved_ids; ++ env->visit_states = visit_states; ++ ++ return 0; ++ ++nomem: ++ kvfree(resolved_sizes); ++ kvfree(resolved_ids); ++ kvfree(visit_states); ++ return -ENOMEM; ++} ++ ++static void btf_verifier_env_free(struct btf_verifier_env *env) ++{ ++ kvfree(env->visit_states); ++ kfree(env); ++} ++ ++static bool env_type_is_resolve_sink(const struct btf_verifier_env *env, ++ const struct btf_type *next_type) ++{ ++ switch (env->resolve_mode) { ++ case RESOLVE_TBD: ++ /* int, enum or void is a sink */ ++ return !btf_type_needs_resolve(next_type); ++ case RESOLVE_PTR: ++ /* int, enum, void, struct, array, func or func_proto is a sink ++ * for ptr ++ */ ++ return !btf_type_is_modifier(next_type) && ++ !btf_type_is_ptr(next_type); ++ case RESOLVE_STRUCT_OR_ARRAY: ++ /* int, enum, void, ptr, func or func_proto is a sink ++ * for struct and array ++ */ ++ return !btf_type_is_modifier(next_type) && ++ !btf_type_is_array(next_type) && ++ !btf_type_is_struct(next_type); ++ default: ++ BUG(); ++ } ++} ++ ++static bool env_type_is_resolved(const struct btf_verifier_env *env, ++ u32 type_id) ++{ ++ return env->visit_states[type_id] == RESOLVED; ++} ++ ++static int env_stack_push(struct btf_verifier_env *env, ++ const struct btf_type *t, u32 type_id) ++{ ++ struct resolve_vertex *v; ++ ++ if (env->top_stack == MAX_RESOLVE_DEPTH) ++ return -E2BIG; ++ ++ if (env->visit_states[type_id] != NOT_VISITED) ++ return -EEXIST; ++ ++ env->visit_states[type_id] = VISITED; ++ ++ v = &env->stack[env->top_stack++]; ++ v->t = t; ++ v->type_id = type_id; ++ v->next_member = 0; ++ ++ if (env->resolve_mode == RESOLVE_TBD) { ++ if (btf_type_is_ptr(t)) ++ env->resolve_mode = RESOLVE_PTR; ++ else if (btf_type_is_struct(t) || btf_type_is_array(t)) ++ env->resolve_mode = RESOLVE_STRUCT_OR_ARRAY; ++ } ++ ++ return 0; ++} ++ ++static void env_stack_set_next_member(struct btf_verifier_env *env, ++ u16 next_member) ++{ ++ env->stack[env->top_stack - 1].next_member = next_member; ++} ++ ++static void env_stack_pop_resolved(struct btf_verifier_env *env, ++ u32 resolved_type_id, ++ u32 resolved_size) ++{ ++ u32 type_id = env->stack[--(env->top_stack)].type_id; ++ struct btf *btf = env->btf; ++ ++ btf->resolved_sizes[type_id] = resolved_size; ++ btf->resolved_ids[type_id] = resolved_type_id; ++ env->visit_states[type_id] = RESOLVED; ++} ++ ++static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env) ++{ ++ return env->top_stack ? &env->stack[env->top_stack - 1] : NULL; ++} ++ ++/* The input param "type_id" must point to a needs_resolve type */ ++static const struct btf_type *btf_type_id_resolve(const struct btf *btf, ++ u32 *type_id) ++{ ++ *type_id = btf->resolved_ids[*type_id]; ++ return btf_type_by_id(btf, *type_id); ++} ++ ++const struct btf_type *btf_type_id_size(const struct btf *btf, ++ u32 *type_id, u32 *ret_size) ++{ ++ const struct btf_type *size_type; ++ u32 size_type_id = *type_id; ++ u32 size = 0; ++ ++ size_type = btf_type_by_id(btf, size_type_id); ++ if (btf_type_nosize_or_null(size_type)) ++ return NULL; ++ ++ if (btf_type_has_size(size_type)) { ++ size = size_type->size; ++ } else if (btf_type_is_array(size_type)) { ++ size = btf->resolved_sizes[size_type_id]; ++ } else if (btf_type_is_ptr(size_type)) { ++ size = sizeof(void *); ++ } else { ++ if (WARN_ON_ONCE(!btf_type_is_modifier(size_type) && ++ !btf_type_is_var(size_type))) ++ return NULL; ++ ++ size_type_id = btf->resolved_ids[size_type_id]; ++ size_type = btf_type_by_id(btf, size_type_id); ++ if (btf_type_nosize_or_null(size_type)) ++ return NULL; ++ else if (btf_type_has_size(size_type)) ++ size = size_type->size; ++ else if (btf_type_is_array(size_type)) ++ size = btf->resolved_sizes[size_type_id]; ++ else if (btf_type_is_ptr(size_type)) ++ size = sizeof(void *); ++ else ++ return NULL; ++ } ++ ++ *type_id = size_type_id; ++ if (ret_size) ++ *ret_size = size; ++ ++ return size_type; ++} ++ ++static int btf_df_check_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ btf_verifier_log_basic(env, struct_type, ++ "Unsupported check_member"); ++ return -EINVAL; ++} ++ ++static int btf_df_check_kflag_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ btf_verifier_log_basic(env, struct_type, ++ "Unsupported check_kflag_member"); ++ return -EINVAL; ++} ++ ++/* Used for ptr, array and struct/union type members. ++ * int, enum and modifier types have their specific callback functions. ++ */ ++static int btf_generic_check_kflag_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ if (BTF_MEMBER_BITFIELD_SIZE(member->offset)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Invalid member bitfield_size"); ++ return -EINVAL; ++ } ++ ++ /* bitfield size is 0, so member->offset represents bit offset only. ++ * It is safe to call non kflag check_member variants. ++ */ ++ return btf_type_ops(member_type)->check_member(env, struct_type, ++ member, ++ member_type); ++} ++ ++static int btf_df_resolve(struct btf_verifier_env *env, ++ const struct resolve_vertex *v) ++{ ++ btf_verifier_log_basic(env, v->t, "Unsupported resolve"); ++ return -EINVAL; ++} ++ ++static void btf_df_seq_show(const struct btf *btf, const struct btf_type *t, ++ u32 type_id, void *data, u8 bits_offsets, ++ struct seq_file *m) ++{ ++ seq_printf(m, "", BTF_INFO_KIND(t->info)); ++} ++ ++static int btf_int_check_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ u32 int_data = btf_type_int(member_type); ++ u32 struct_bits_off = member->offset; ++ u32 struct_size = struct_type->size; ++ u32 nr_copy_bits; ++ u32 bytes_offset; ++ ++ if (U32_MAX - struct_bits_off < BTF_INT_OFFSET(int_data)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "bits_offset exceeds U32_MAX"); ++ return -EINVAL; ++ } ++ ++ struct_bits_off += BTF_INT_OFFSET(int_data); ++ bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); ++ nr_copy_bits = BTF_INT_BITS(int_data) + ++ BITS_PER_BYTE_MASKED(struct_bits_off); ++ ++ if (nr_copy_bits > BITS_PER_U128) { ++ btf_verifier_log_member(env, struct_type, member, ++ "nr_copy_bits exceeds 128"); ++ return -EINVAL; ++ } ++ ++ if (struct_size < bytes_offset || ++ struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member exceeds struct_size"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int btf_int_check_kflag_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ u32 struct_bits_off, nr_bits, nr_int_data_bits, bytes_offset; ++ u32 int_data = btf_type_int(member_type); ++ u32 struct_size = struct_type->size; ++ u32 nr_copy_bits; ++ ++ /* a regular int type is required for the kflag int member */ ++ if (!btf_type_int_is_regular(member_type)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Invalid member base type"); ++ return -EINVAL; ++ } ++ ++ /* check sanity of bitfield size */ ++ nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset); ++ struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset); ++ nr_int_data_bits = BTF_INT_BITS(int_data); ++ if (!nr_bits) { ++ /* Not a bitfield member, member offset must be at byte ++ * boundary. ++ */ ++ if (BITS_PER_BYTE_MASKED(struct_bits_off)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Invalid member offset"); ++ return -EINVAL; ++ } ++ ++ nr_bits = nr_int_data_bits; ++ } else if (nr_bits > nr_int_data_bits) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Invalid member bitfield_size"); ++ return -EINVAL; ++ } ++ ++ bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); ++ nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off); ++ if (nr_copy_bits > BITS_PER_U128) { ++ btf_verifier_log_member(env, struct_type, member, ++ "nr_copy_bits exceeds 128"); ++ return -EINVAL; ++ } ++ ++ if (struct_size < bytes_offset || ++ struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member exceeds struct_size"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static s32 btf_int_check_meta(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left) ++{ ++ u32 int_data, nr_bits, meta_needed = sizeof(int_data); ++ u16 encoding; ++ ++ if (meta_left < meta_needed) { ++ btf_verifier_log_basic(env, t, ++ "meta_left:%u meta_needed:%u", ++ meta_left, meta_needed); ++ return -EINVAL; ++ } ++ ++ if (btf_type_vlen(t)) { ++ btf_verifier_log_type(env, t, "vlen != 0"); ++ return -EINVAL; ++ } ++ ++ if (btf_type_kflag(t)) { ++ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); ++ return -EINVAL; ++ } ++ ++ int_data = btf_type_int(t); ++ if (int_data & ~BTF_INT_MASK) { ++ btf_verifier_log_basic(env, t, "Invalid int_data:%x", ++ int_data); ++ return -EINVAL; ++ } ++ ++ nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data); ++ ++ if (nr_bits > BITS_PER_U128) { ++ btf_verifier_log_type(env, t, "nr_bits exceeds %zu", ++ BITS_PER_U128); ++ return -EINVAL; ++ } ++ ++ if (BITS_ROUNDUP_BYTES(nr_bits) > t->size) { ++ btf_verifier_log_type(env, t, "nr_bits exceeds type_size"); ++ return -EINVAL; ++ } ++ ++ /* ++ * Only one of the encoding bits is allowed and it ++ * should be sufficient for the pretty print purpose (i.e. decoding). ++ * Multiple bits can be allowed later if it is found ++ * to be insufficient. ++ */ ++ encoding = BTF_INT_ENCODING(int_data); ++ if (encoding && ++ encoding != BTF_INT_SIGNED && ++ encoding != BTF_INT_CHAR && ++ encoding != BTF_INT_BOOL) { ++ btf_verifier_log_type(env, t, "Unsupported encoding"); ++ return -ENOTSUPP; ++ } ++ ++ btf_verifier_log_type(env, t, NULL); ++ ++ return meta_needed; ++} ++ ++static void btf_int_log(struct btf_verifier_env *env, ++ const struct btf_type *t) ++{ ++ int int_data = btf_type_int(t); ++ ++ btf_verifier_log(env, ++ "size=%u bits_offset=%u nr_bits=%u encoding=%s", ++ t->size, BTF_INT_OFFSET(int_data), ++ BTF_INT_BITS(int_data), ++ btf_int_encoding_str(BTF_INT_ENCODING(int_data))); ++} ++ ++static void btf_int128_print(struct seq_file *m, void *data) ++{ ++ /* data points to a __int128 number. ++ * Suppose ++ * int128_num = *(__int128 *)data; ++ * The below formulas shows what upper_num and lower_num represents: ++ * upper_num = int128_num >> 64; ++ * lower_num = int128_num & 0xffffffffFFFFFFFFULL; ++ */ ++ u64 upper_num, lower_num; ++ ++#ifdef __BIG_ENDIAN_BITFIELD ++ upper_num = *(u64 *)data; ++ lower_num = *(u64 *)(data + 8); ++#else ++ upper_num = *(u64 *)(data + 8); ++ lower_num = *(u64 *)data; ++#endif ++ if (upper_num == 0) ++ seq_printf(m, "0x%llx", lower_num); ++ else ++ seq_printf(m, "0x%llx%016llx", upper_num, lower_num); ++} ++ ++static void btf_int128_shift(u64 *print_num, u16 left_shift_bits, ++ u16 right_shift_bits) ++{ ++ u64 upper_num, lower_num; ++ ++#ifdef __BIG_ENDIAN_BITFIELD ++ upper_num = print_num[0]; ++ lower_num = print_num[1]; ++#else ++ upper_num = print_num[1]; ++ lower_num = print_num[0]; ++#endif ++ ++ /* shake out un-needed bits by shift/or operations */ ++ if (left_shift_bits >= 64) { ++ upper_num = lower_num << (left_shift_bits - 64); ++ lower_num = 0; ++ } else { ++ upper_num = (upper_num << left_shift_bits) | ++ (lower_num >> (64 - left_shift_bits)); ++ lower_num = lower_num << left_shift_bits; ++ } ++ ++ if (right_shift_bits >= 64) { ++ lower_num = upper_num >> (right_shift_bits - 64); ++ upper_num = 0; ++ } else { ++ lower_num = (lower_num >> right_shift_bits) | ++ (upper_num << (64 - right_shift_bits)); ++ upper_num = upper_num >> right_shift_bits; ++ } ++ ++#ifdef __BIG_ENDIAN_BITFIELD ++ print_num[0] = upper_num; ++ print_num[1] = lower_num; ++#else ++ print_num[0] = lower_num; ++ print_num[1] = upper_num; ++#endif ++} ++ ++static void btf_bitfield_seq_show(void *data, u8 bits_offset, ++ u8 nr_bits, struct seq_file *m) ++{ ++ u16 left_shift_bits, right_shift_bits; ++ u8 nr_copy_bytes; ++ u8 nr_copy_bits; ++ u64 print_num[2] = {}; ++ ++ nr_copy_bits = nr_bits + bits_offset; ++ nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); ++ ++ memcpy(print_num, data, nr_copy_bytes); ++ ++#ifdef __BIG_ENDIAN_BITFIELD ++ left_shift_bits = bits_offset; ++#else ++ left_shift_bits = BITS_PER_U128 - nr_copy_bits; ++#endif ++ right_shift_bits = BITS_PER_U128 - nr_bits; ++ ++ btf_int128_shift(print_num, left_shift_bits, right_shift_bits); ++ btf_int128_print(m, print_num); ++} ++ ++ ++static void btf_int_bits_seq_show(const struct btf *btf, ++ const struct btf_type *t, ++ void *data, u8 bits_offset, ++ struct seq_file *m) ++{ ++ u32 int_data = btf_type_int(t); ++ u8 nr_bits = BTF_INT_BITS(int_data); ++ u8 total_bits_offset; ++ ++ /* ++ * bits_offset is at most 7. ++ * BTF_INT_OFFSET() cannot exceed 128 bits. ++ */ ++ total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); ++ data += BITS_ROUNDDOWN_BYTES(total_bits_offset); ++ bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); ++ btf_bitfield_seq_show(data, bits_offset, nr_bits, m); ++} ++ ++static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, ++ u32 type_id, void *data, u8 bits_offset, ++ struct seq_file *m) ++{ ++ u32 int_data = btf_type_int(t); ++ u8 encoding = BTF_INT_ENCODING(int_data); ++ bool sign = encoding & BTF_INT_SIGNED; ++ u8 nr_bits = BTF_INT_BITS(int_data); ++ ++ if (bits_offset || BTF_INT_OFFSET(int_data) || ++ BITS_PER_BYTE_MASKED(nr_bits)) { ++ btf_int_bits_seq_show(btf, t, data, bits_offset, m); ++ return; ++ } ++ ++ switch (nr_bits) { ++ case 128: ++ btf_int128_print(m, data); ++ break; ++ case 64: ++ if (sign) ++ seq_printf(m, "%lld", *(s64 *)data); ++ else ++ seq_printf(m, "%llu", *(u64 *)data); ++ break; ++ case 32: ++ if (sign) ++ seq_printf(m, "%d", *(s32 *)data); ++ else ++ seq_printf(m, "%u", *(u32 *)data); ++ break; ++ case 16: ++ if (sign) ++ seq_printf(m, "%d", *(s16 *)data); ++ else ++ seq_printf(m, "%u", *(u16 *)data); ++ break; ++ case 8: ++ if (sign) ++ seq_printf(m, "%d", *(s8 *)data); ++ else ++ seq_printf(m, "%u", *(u8 *)data); ++ break; ++ default: ++ btf_int_bits_seq_show(btf, t, data, bits_offset, m); ++ } ++} ++ ++static const struct btf_kind_operations int_ops = { ++ .check_meta = btf_int_check_meta, ++ .resolve = btf_df_resolve, ++ .check_member = btf_int_check_member, ++ .check_kflag_member = btf_int_check_kflag_member, ++ .log_details = btf_int_log, ++ .seq_show = btf_int_seq_show, ++}; ++ ++static int btf_modifier_check_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ const struct btf_type *resolved_type; ++ u32 resolved_type_id = member->type; ++ struct btf_member resolved_member; ++ struct btf *btf = env->btf; ++ ++ resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL); ++ if (!resolved_type) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Invalid member"); ++ return -EINVAL; ++ } ++ ++ resolved_member = *member; ++ resolved_member.type = resolved_type_id; ++ ++ return btf_type_ops(resolved_type)->check_member(env, struct_type, ++ &resolved_member, ++ resolved_type); ++} ++ ++static int btf_modifier_check_kflag_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ const struct btf_type *resolved_type; ++ u32 resolved_type_id = member->type; ++ struct btf_member resolved_member; ++ struct btf *btf = env->btf; ++ ++ resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL); ++ if (!resolved_type) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Invalid member"); ++ return -EINVAL; ++ } ++ ++ resolved_member = *member; ++ resolved_member.type = resolved_type_id; ++ ++ return btf_type_ops(resolved_type)->check_kflag_member(env, struct_type, ++ &resolved_member, ++ resolved_type); ++} ++ ++static int btf_ptr_check_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ u32 struct_size, struct_bits_off, bytes_offset; ++ ++ struct_size = struct_type->size; ++ struct_bits_off = member->offset; ++ bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); ++ ++ if (BITS_PER_BYTE_MASKED(struct_bits_off)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member is not byte aligned"); ++ return -EINVAL; ++ } ++ ++ if (struct_size - bytes_offset < sizeof(void *)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member exceeds struct_size"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int btf_ref_type_check_meta(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left) ++{ ++ if (btf_type_vlen(t)) { ++ btf_verifier_log_type(env, t, "vlen != 0"); ++ return -EINVAL; ++ } ++ ++ if (btf_type_kflag(t)) { ++ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); ++ return -EINVAL; ++ } ++ ++ if (!BTF_TYPE_ID_VALID(t->type)) { ++ btf_verifier_log_type(env, t, "Invalid type_id"); ++ return -EINVAL; ++ } ++ ++ /* typedef type must have a valid name, and other ref types, ++ * volatile, const, restrict, should have a null name. ++ */ ++ if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) { ++ if (!t->name_off || ++ !btf_name_valid_identifier(env->btf, t->name_off)) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } ++ } else { ++ if (t->name_off) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } ++ } ++ ++ btf_verifier_log_type(env, t, NULL); ++ ++ return 0; ++} ++ ++static int btf_modifier_resolve(struct btf_verifier_env *env, ++ const struct resolve_vertex *v) ++{ ++ const struct btf_type *t = v->t; ++ const struct btf_type *next_type; ++ u32 next_type_id = t->type; ++ struct btf *btf = env->btf; ++ ++ next_type = btf_type_by_id(btf, next_type_id); ++ if (!next_type || btf_type_is_resolve_source_only(next_type)) { ++ btf_verifier_log_type(env, v->t, "Invalid type_id"); ++ return -EINVAL; ++ } ++ ++ if (!env_type_is_resolve_sink(env, next_type) && ++ !env_type_is_resolved(env, next_type_id)) ++ return env_stack_push(env, next_type, next_type_id); ++ ++ /* Figure out the resolved next_type_id with size. ++ * They will be stored in the current modifier's ++ * resolved_ids and resolved_sizes such that it can ++ * save us a few type-following when we use it later (e.g. in ++ * pretty print). ++ */ ++ if (!btf_type_id_size(btf, &next_type_id, NULL)) { ++ if (env_type_is_resolved(env, next_type_id)) ++ next_type = btf_type_id_resolve(btf, &next_type_id); ++ ++ /* "typedef void new_void", "const void"...etc */ ++ if (!btf_type_is_void(next_type) && ++ !btf_type_is_fwd(next_type) && ++ !btf_type_is_func_proto(next_type)) { ++ btf_verifier_log_type(env, v->t, "Invalid type_id"); ++ return -EINVAL; ++ } ++ } ++ ++ env_stack_pop_resolved(env, next_type_id, 0); ++ ++ return 0; ++} ++ ++static int btf_var_resolve(struct btf_verifier_env *env, ++ const struct resolve_vertex *v) ++{ ++ const struct btf_type *next_type; ++ const struct btf_type *t = v->t; ++ u32 next_type_id = t->type; ++ struct btf *btf = env->btf; ++ ++ next_type = btf_type_by_id(btf, next_type_id); ++ if (!next_type || btf_type_is_resolve_source_only(next_type)) { ++ btf_verifier_log_type(env, v->t, "Invalid type_id"); ++ return -EINVAL; ++ } ++ ++ if (!env_type_is_resolve_sink(env, next_type) && ++ !env_type_is_resolved(env, next_type_id)) ++ return env_stack_push(env, next_type, next_type_id); ++ ++ if (btf_type_is_modifier(next_type)) { ++ const struct btf_type *resolved_type; ++ u32 resolved_type_id; ++ ++ resolved_type_id = next_type_id; ++ resolved_type = btf_type_id_resolve(btf, &resolved_type_id); ++ ++ if (btf_type_is_ptr(resolved_type) && ++ !env_type_is_resolve_sink(env, resolved_type) && ++ !env_type_is_resolved(env, resolved_type_id)) ++ return env_stack_push(env, resolved_type, ++ resolved_type_id); ++ } ++ ++ /* We must resolve to something concrete at this point, no ++ * forward types or similar that would resolve to size of ++ * zero is allowed. ++ */ ++ if (!btf_type_id_size(btf, &next_type_id, NULL)) { ++ btf_verifier_log_type(env, v->t, "Invalid type_id"); ++ return -EINVAL; ++ } ++ ++ env_stack_pop_resolved(env, next_type_id, 0); ++ ++ return 0; ++} ++ ++static int btf_ptr_resolve(struct btf_verifier_env *env, ++ const struct resolve_vertex *v) ++{ ++ const struct btf_type *next_type; ++ const struct btf_type *t = v->t; ++ u32 next_type_id = t->type; ++ struct btf *btf = env->btf; ++ ++ next_type = btf_type_by_id(btf, next_type_id); ++ if (!next_type || btf_type_is_resolve_source_only(next_type)) { ++ btf_verifier_log_type(env, v->t, "Invalid type_id"); ++ return -EINVAL; ++ } ++ ++ if (!env_type_is_resolve_sink(env, next_type) && ++ !env_type_is_resolved(env, next_type_id)) ++ return env_stack_push(env, next_type, next_type_id); ++ ++ /* If the modifier was RESOLVED during RESOLVE_STRUCT_OR_ARRAY, ++ * the modifier may have stopped resolving when it was resolved ++ * to a ptr (last-resolved-ptr). ++ * ++ * We now need to continue from the last-resolved-ptr to ++ * ensure the last-resolved-ptr will not referring back to ++ * the currenct ptr (t). ++ */ ++ if (btf_type_is_modifier(next_type)) { ++ const struct btf_type *resolved_type; ++ u32 resolved_type_id; ++ ++ resolved_type_id = next_type_id; ++ resolved_type = btf_type_id_resolve(btf, &resolved_type_id); ++ ++ if (btf_type_is_ptr(resolved_type) && ++ !env_type_is_resolve_sink(env, resolved_type) && ++ !env_type_is_resolved(env, resolved_type_id)) ++ return env_stack_push(env, resolved_type, ++ resolved_type_id); ++ } ++ ++ if (!btf_type_id_size(btf, &next_type_id, NULL)) { ++ if (env_type_is_resolved(env, next_type_id)) ++ next_type = btf_type_id_resolve(btf, &next_type_id); ++ ++ if (!btf_type_is_void(next_type) && ++ !btf_type_is_fwd(next_type) && ++ !btf_type_is_func_proto(next_type)) { ++ btf_verifier_log_type(env, v->t, "Invalid type_id"); ++ return -EINVAL; ++ } ++ } ++ ++ env_stack_pop_resolved(env, next_type_id, 0); ++ ++ return 0; ++} ++ ++static void btf_modifier_seq_show(const struct btf *btf, ++ const struct btf_type *t, ++ u32 type_id, void *data, ++ u8 bits_offset, struct seq_file *m) ++{ ++ t = btf_type_id_resolve(btf, &type_id); ++ ++ btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m); ++} ++ ++static void btf_var_seq_show(const struct btf *btf, const struct btf_type *t, ++ u32 type_id, void *data, u8 bits_offset, ++ struct seq_file *m) ++{ ++ t = btf_type_id_resolve(btf, &type_id); ++ ++ btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m); ++} ++ ++static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t, ++ u32 type_id, void *data, u8 bits_offset, ++ struct seq_file *m) ++{ ++ /* It is a hashed value */ ++ seq_printf(m, "%p", *(void **)data); ++} ++ ++static void btf_ref_type_log(struct btf_verifier_env *env, ++ const struct btf_type *t) ++{ ++ btf_verifier_log(env, "type_id=%u", t->type); ++} ++ ++static struct btf_kind_operations modifier_ops = { ++ .check_meta = btf_ref_type_check_meta, ++ .resolve = btf_modifier_resolve, ++ .check_member = btf_modifier_check_member, ++ .check_kflag_member = btf_modifier_check_kflag_member, ++ .log_details = btf_ref_type_log, ++ .seq_show = btf_modifier_seq_show, ++}; ++ ++static struct btf_kind_operations ptr_ops = { ++ .check_meta = btf_ref_type_check_meta, ++ .resolve = btf_ptr_resolve, ++ .check_member = btf_ptr_check_member, ++ .check_kflag_member = btf_generic_check_kflag_member, ++ .log_details = btf_ref_type_log, ++ .seq_show = btf_ptr_seq_show, ++}; ++ ++static s32 btf_fwd_check_meta(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left) ++{ ++ if (btf_type_vlen(t)) { ++ btf_verifier_log_type(env, t, "vlen != 0"); ++ return -EINVAL; ++ } ++ ++ if (t->type) { ++ btf_verifier_log_type(env, t, "type != 0"); ++ return -EINVAL; ++ } ++ ++ /* fwd type must have a valid name */ ++ if (!t->name_off || ++ !btf_name_valid_identifier(env->btf, t->name_off)) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } ++ ++ btf_verifier_log_type(env, t, NULL); ++ ++ return 0; ++} ++ ++static void btf_fwd_type_log(struct btf_verifier_env *env, ++ const struct btf_type *t) ++{ ++ btf_verifier_log(env, "%s", btf_type_kflag(t) ? "union" : "struct"); ++} ++ ++static struct btf_kind_operations fwd_ops = { ++ .check_meta = btf_fwd_check_meta, ++ .resolve = btf_df_resolve, ++ .check_member = btf_df_check_member, ++ .check_kflag_member = btf_df_check_kflag_member, ++ .log_details = btf_fwd_type_log, ++ .seq_show = btf_df_seq_show, ++}; ++ ++static int btf_array_check_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ u32 struct_bits_off = member->offset; ++ u32 struct_size, bytes_offset; ++ u32 array_type_id, array_size; ++ struct btf *btf = env->btf; ++ ++ if (BITS_PER_BYTE_MASKED(struct_bits_off)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member is not byte aligned"); ++ return -EINVAL; ++ } ++ ++ array_type_id = member->type; ++ btf_type_id_size(btf, &array_type_id, &array_size); ++ struct_size = struct_type->size; ++ bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); ++ if (struct_size - bytes_offset < array_size) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member exceeds struct_size"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static s32 btf_array_check_meta(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left) ++{ ++ const struct btf_array *array = btf_type_array(t); ++ u32 meta_needed = sizeof(*array); ++ ++ if (meta_left < meta_needed) { ++ btf_verifier_log_basic(env, t, ++ "meta_left:%u meta_needed:%u", ++ meta_left, meta_needed); ++ return -EINVAL; ++ } ++ ++ /* array type should not have a name */ ++ if (t->name_off) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } ++ ++ if (btf_type_vlen(t)) { ++ btf_verifier_log_type(env, t, "vlen != 0"); ++ return -EINVAL; ++ } ++ ++ if (btf_type_kflag(t)) { ++ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); ++ return -EINVAL; ++ } ++ ++ if (t->size) { ++ btf_verifier_log_type(env, t, "size != 0"); ++ return -EINVAL; ++ } ++ ++ /* Array elem type and index type cannot be in type void, ++ * so !array->type and !array->index_type are not allowed. ++ */ ++ if (!array->type || !BTF_TYPE_ID_VALID(array->type)) { ++ btf_verifier_log_type(env, t, "Invalid elem"); ++ return -EINVAL; ++ } ++ ++ if (!array->index_type || !BTF_TYPE_ID_VALID(array->index_type)) { ++ btf_verifier_log_type(env, t, "Invalid index"); ++ return -EINVAL; ++ } ++ ++ btf_verifier_log_type(env, t, NULL); ++ ++ return meta_needed; ++} ++ ++static int btf_array_resolve(struct btf_verifier_env *env, ++ const struct resolve_vertex *v) ++{ ++ const struct btf_array *array = btf_type_array(v->t); ++ const struct btf_type *elem_type, *index_type; ++ u32 elem_type_id, index_type_id; ++ struct btf *btf = env->btf; ++ u32 elem_size; ++ ++ /* Check array->index_type */ ++ index_type_id = array->index_type; ++ index_type = btf_type_by_id(btf, index_type_id); ++ if (btf_type_nosize_or_null(index_type) || ++ btf_type_is_resolve_source_only(index_type)) { ++ btf_verifier_log_type(env, v->t, "Invalid index"); ++ return -EINVAL; ++ } ++ ++ if (!env_type_is_resolve_sink(env, index_type) && ++ !env_type_is_resolved(env, index_type_id)) ++ return env_stack_push(env, index_type, index_type_id); ++ ++ index_type = btf_type_id_size(btf, &index_type_id, NULL); ++ if (!index_type || !btf_type_is_int(index_type) || ++ !btf_type_int_is_regular(index_type)) { ++ btf_verifier_log_type(env, v->t, "Invalid index"); ++ return -EINVAL; ++ } ++ ++ /* Check array->type */ ++ elem_type_id = array->type; ++ elem_type = btf_type_by_id(btf, elem_type_id); ++ if (btf_type_nosize_or_null(elem_type) || ++ btf_type_is_resolve_source_only(elem_type)) { ++ btf_verifier_log_type(env, v->t, ++ "Invalid elem"); ++ return -EINVAL; ++ } ++ ++ if (!env_type_is_resolve_sink(env, elem_type) && ++ !env_type_is_resolved(env, elem_type_id)) ++ return env_stack_push(env, elem_type, elem_type_id); ++ ++ elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); ++ if (!elem_type) { ++ btf_verifier_log_type(env, v->t, "Invalid elem"); ++ return -EINVAL; ++ } ++ ++ if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) { ++ btf_verifier_log_type(env, v->t, "Invalid array of int"); ++ return -EINVAL; ++ } ++ ++ if (array->nelems && elem_size > U32_MAX / array->nelems) { ++ btf_verifier_log_type(env, v->t, ++ "Array size overflows U32_MAX"); ++ return -EINVAL; ++ } ++ ++ env_stack_pop_resolved(env, elem_type_id, elem_size * array->nelems); ++ ++ return 0; ++} ++ ++static void btf_array_log(struct btf_verifier_env *env, ++ const struct btf_type *t) ++{ ++ const struct btf_array *array = btf_type_array(t); ++ ++ btf_verifier_log(env, "type_id=%u index_type_id=%u nr_elems=%u", ++ array->type, array->index_type, array->nelems); ++} ++ ++static void btf_array_seq_show(const struct btf *btf, const struct btf_type *t, ++ u32 type_id, void *data, u8 bits_offset, ++ struct seq_file *m) ++{ ++ const struct btf_array *array = btf_type_array(t); ++ const struct btf_kind_operations *elem_ops; ++ const struct btf_type *elem_type; ++ u32 i, elem_size, elem_type_id; ++ ++ elem_type_id = array->type; ++ elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); ++ elem_ops = btf_type_ops(elem_type); ++ seq_puts(m, "["); ++ for (i = 0; i < array->nelems; i++) { ++ if (i) ++ seq_puts(m, ","); ++ ++ elem_ops->seq_show(btf, elem_type, elem_type_id, data, ++ bits_offset, m); ++ data += elem_size; ++ } ++ seq_puts(m, "]"); ++} ++ ++static struct btf_kind_operations array_ops = { ++ .check_meta = btf_array_check_meta, ++ .resolve = btf_array_resolve, ++ .check_member = btf_array_check_member, ++ .check_kflag_member = btf_generic_check_kflag_member, ++ .log_details = btf_array_log, ++ .seq_show = btf_array_seq_show, ++}; ++ ++static int btf_struct_check_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ u32 struct_bits_off = member->offset; ++ u32 struct_size, bytes_offset; ++ ++ if (BITS_PER_BYTE_MASKED(struct_bits_off)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member is not byte aligned"); ++ return -EINVAL; ++ } ++ ++ struct_size = struct_type->size; ++ bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); ++ if (struct_size - bytes_offset < member_type->size) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member exceeds struct_size"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static s32 btf_struct_check_meta(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left) ++{ ++ bool is_union = BTF_INFO_KIND(t->info) == BTF_KIND_UNION; ++ const struct btf_member *member; ++ u32 meta_needed, last_offset; ++ struct btf *btf = env->btf; ++ u32 struct_size = t->size; ++ u32 offset; ++ u16 i; ++ ++ meta_needed = btf_type_vlen(t) * sizeof(*member); ++ if (meta_left < meta_needed) { ++ btf_verifier_log_basic(env, t, ++ "meta_left:%u meta_needed:%u", ++ meta_left, meta_needed); ++ return -EINVAL; ++ } ++ ++ /* struct type either no name or a valid one */ ++ if (t->name_off && ++ !btf_name_valid_identifier(env->btf, t->name_off)) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } ++ ++ btf_verifier_log_type(env, t, NULL); ++ ++ last_offset = 0; ++ for_each_member(i, t, member) { ++ if (!btf_name_offset_valid(btf, member->name_off)) { ++ btf_verifier_log_member(env, t, member, ++ "Invalid member name_offset:%u", ++ member->name_off); ++ return -EINVAL; ++ } ++ ++ /* struct member either no name or a valid one */ ++ if (member->name_off && ++ !btf_name_valid_identifier(btf, member->name_off)) { ++ btf_verifier_log_member(env, t, member, "Invalid name"); ++ return -EINVAL; ++ } ++ /* A member cannot be in type void */ ++ if (!member->type || !BTF_TYPE_ID_VALID(member->type)) { ++ btf_verifier_log_member(env, t, member, ++ "Invalid type_id"); ++ return -EINVAL; ++ } ++ ++ offset = btf_member_bit_offset(t, member); ++ if (is_union && offset) { ++ btf_verifier_log_member(env, t, member, ++ "Invalid member bits_offset"); ++ return -EINVAL; ++ } ++ ++ /* ++ * ">" instead of ">=" because the last member could be ++ * "char a[0];" ++ */ ++ if (last_offset > offset) { ++ btf_verifier_log_member(env, t, member, ++ "Invalid member bits_offset"); ++ return -EINVAL; ++ } ++ ++ if (BITS_ROUNDUP_BYTES(offset) > struct_size) { ++ btf_verifier_log_member(env, t, member, ++ "Member bits_offset exceeds its struct size"); ++ return -EINVAL; ++ } ++ ++ btf_verifier_log_member(env, t, member, NULL); ++ last_offset = offset; ++ } ++ ++ return meta_needed; ++} ++ ++static int btf_struct_resolve(struct btf_verifier_env *env, ++ const struct resolve_vertex *v) ++{ ++ const struct btf_member *member; ++ int err; ++ u16 i; ++ ++ /* Before continue resolving the next_member, ++ * ensure the last member is indeed resolved to a ++ * type with size info. ++ */ ++ if (v->next_member) { ++ const struct btf_type *last_member_type; ++ const struct btf_member *last_member; ++ u16 last_member_type_id; ++ ++ last_member = btf_type_member(v->t) + v->next_member - 1; ++ last_member_type_id = last_member->type; ++ if (WARN_ON_ONCE(!env_type_is_resolved(env, ++ last_member_type_id))) ++ return -EINVAL; ++ ++ last_member_type = btf_type_by_id(env->btf, ++ last_member_type_id); ++ if (btf_type_kflag(v->t)) ++ err = btf_type_ops(last_member_type)->check_kflag_member(env, v->t, ++ last_member, ++ last_member_type); ++ else ++ err = btf_type_ops(last_member_type)->check_member(env, v->t, ++ last_member, ++ last_member_type); ++ if (err) ++ return err; ++ } ++ ++ for_each_member_from(i, v->next_member, v->t, member) { ++ u32 member_type_id = member->type; ++ const struct btf_type *member_type = btf_type_by_id(env->btf, ++ member_type_id); ++ ++ if (btf_type_nosize_or_null(member_type) || ++ btf_type_is_resolve_source_only(member_type)) { ++ btf_verifier_log_member(env, v->t, member, ++ "Invalid member"); ++ return -EINVAL; ++ } ++ ++ if (!env_type_is_resolve_sink(env, member_type) && ++ !env_type_is_resolved(env, member_type_id)) { ++ env_stack_set_next_member(env, i + 1); ++ return env_stack_push(env, member_type, member_type_id); ++ } ++ ++ if (btf_type_kflag(v->t)) ++ err = btf_type_ops(member_type)->check_kflag_member(env, v->t, ++ member, ++ member_type); ++ else ++ err = btf_type_ops(member_type)->check_member(env, v->t, ++ member, ++ member_type); ++ if (err) ++ return err; ++ } ++ ++ env_stack_pop_resolved(env, 0, 0); ++ ++ return 0; ++} ++ ++static void btf_struct_log(struct btf_verifier_env *env, ++ const struct btf_type *t) ++{ ++ btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); ++} ++ ++/* find 'struct bpf_spin_lock' in map value. ++ * return >= 0 offset if found ++ * and < 0 in case of error ++ */ ++int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) ++{ ++ const struct btf_member *member; ++ u32 i, off = -ENOENT; ++ ++ if (!__btf_type_is_struct(t)) ++ return -EINVAL; ++ ++ for_each_member(i, t, member) { ++ const struct btf_type *member_type = btf_type_by_id(btf, ++ member->type); ++ if (!__btf_type_is_struct(member_type)) ++ continue; ++ if (member_type->size != sizeof(struct bpf_spin_lock)) ++ continue; ++ if (strcmp(__btf_name_by_offset(btf, member_type->name_off), ++ "bpf_spin_lock")) ++ continue; ++ if (off != -ENOENT) ++ /* only one 'struct bpf_spin_lock' is allowed */ ++ return -E2BIG; ++ off = btf_member_bit_offset(t, member); ++ if (off % 8) ++ /* valid C code cannot generate such BTF */ ++ return -EINVAL; ++ off /= 8; ++ if (off % __alignof__(struct bpf_spin_lock)) ++ /* valid struct bpf_spin_lock will be 4 byte aligned */ ++ return -EINVAL; ++ } ++ return off; ++} ++ ++static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, ++ u32 type_id, void *data, u8 bits_offset, ++ struct seq_file *m) ++{ ++ const char *seq = BTF_INFO_KIND(t->info) == BTF_KIND_UNION ? "|" : ","; ++ const struct btf_member *member; ++ u32 i; ++ ++ seq_puts(m, "{"); ++ for_each_member(i, t, member) { ++ const struct btf_type *member_type = btf_type_by_id(btf, ++ member->type); ++ const struct btf_kind_operations *ops; ++ u32 member_offset, bitfield_size; ++ u32 bytes_offset; ++ u8 bits8_offset; ++ ++ if (i) ++ seq_puts(m, seq); ++ ++ member_offset = btf_member_bit_offset(t, member); ++ bitfield_size = btf_member_bitfield_size(t, member); ++ bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); ++ bits8_offset = BITS_PER_BYTE_MASKED(member_offset); ++ if (bitfield_size) { ++ btf_bitfield_seq_show(data + bytes_offset, bits8_offset, ++ bitfield_size, m); ++ } else { ++ ops = btf_type_ops(member_type); ++ ops->seq_show(btf, member_type, member->type, ++ data + bytes_offset, bits8_offset, m); ++ } ++ } ++ seq_puts(m, "}"); ++} ++ ++static struct btf_kind_operations struct_ops = { ++ .check_meta = btf_struct_check_meta, ++ .resolve = btf_struct_resolve, ++ .check_member = btf_struct_check_member, ++ .check_kflag_member = btf_generic_check_kflag_member, ++ .log_details = btf_struct_log, ++ .seq_show = btf_struct_seq_show, ++}; ++ ++static int btf_enum_check_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ u32 struct_bits_off = member->offset; ++ u32 struct_size, bytes_offset; ++ ++ if (BITS_PER_BYTE_MASKED(struct_bits_off)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member is not byte aligned"); ++ return -EINVAL; ++ } ++ ++ struct_size = struct_type->size; ++ bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); ++ if (struct_size - bytes_offset < member_type->size) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member exceeds struct_size"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int btf_enum_check_kflag_member(struct btf_verifier_env *env, ++ const struct btf_type *struct_type, ++ const struct btf_member *member, ++ const struct btf_type *member_type) ++{ ++ u32 struct_bits_off, nr_bits, bytes_end, struct_size; ++ u32 int_bitsize = sizeof(int) * BITS_PER_BYTE; ++ ++ struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset); ++ nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset); ++ if (!nr_bits) { ++ if (BITS_PER_BYTE_MASKED(struct_bits_off)) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member is not byte aligned"); ++ return -EINVAL; ++ } ++ ++ nr_bits = int_bitsize; ++ } else if (nr_bits > int_bitsize) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Invalid member bitfield_size"); ++ return -EINVAL; ++ } ++ ++ struct_size = struct_type->size; ++ bytes_end = BITS_ROUNDUP_BYTES(struct_bits_off + nr_bits); ++ if (struct_size < bytes_end) { ++ btf_verifier_log_member(env, struct_type, member, ++ "Member exceeds struct_size"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static s32 btf_enum_check_meta(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left) ++{ ++ const struct btf_enum *enums = btf_type_enum(t); ++ struct btf *btf = env->btf; ++ u16 i, nr_enums; ++ u32 meta_needed; ++ ++ nr_enums = btf_type_vlen(t); ++ meta_needed = nr_enums * sizeof(*enums); ++ ++ if (meta_left < meta_needed) { ++ btf_verifier_log_basic(env, t, ++ "meta_left:%u meta_needed:%u", ++ meta_left, meta_needed); ++ return -EINVAL; ++ } ++ ++ if (btf_type_kflag(t)) { ++ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); ++ return -EINVAL; ++ } ++ ++ if (t->size > 8 || !is_power_of_2(t->size)) { ++ btf_verifier_log_type(env, t, "Unexpected size"); ++ return -EINVAL; ++ } ++ ++ /* enum type either no name or a valid one */ ++ if (t->name_off && ++ !btf_name_valid_identifier(env->btf, t->name_off)) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } ++ ++ btf_verifier_log_type(env, t, NULL); ++ ++ for (i = 0; i < nr_enums; i++) { ++ if (!btf_name_offset_valid(btf, enums[i].name_off)) { ++ btf_verifier_log(env, "\tInvalid name_offset:%u", ++ enums[i].name_off); ++ return -EINVAL; ++ } ++ ++ /* enum member must have a valid name */ ++ if (!enums[i].name_off || ++ !btf_name_valid_identifier(btf, enums[i].name_off)) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } ++ ++ ++ btf_verifier_log(env, "\t%s val=%d\n", ++ __btf_name_by_offset(btf, enums[i].name_off), ++ enums[i].val); ++ } ++ ++ return meta_needed; ++} ++ ++static void btf_enum_log(struct btf_verifier_env *env, ++ const struct btf_type *t) ++{ ++ btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); ++} ++ ++static void btf_enum_seq_show(const struct btf *btf, const struct btf_type *t, ++ u32 type_id, void *data, u8 bits_offset, ++ struct seq_file *m) ++{ ++ const struct btf_enum *enums = btf_type_enum(t); ++ u32 i, nr_enums = btf_type_vlen(t); ++ int v = *(int *)data; ++ ++ for (i = 0; i < nr_enums; i++) { ++ if (v == enums[i].val) { ++ seq_printf(m, "%s", ++ __btf_name_by_offset(btf, ++ enums[i].name_off)); ++ return; ++ } ++ } ++ ++ seq_printf(m, "%d", v); ++} ++ ++static struct btf_kind_operations enum_ops = { ++ .check_meta = btf_enum_check_meta, ++ .resolve = btf_df_resolve, ++ .check_member = btf_enum_check_member, ++ .check_kflag_member = btf_enum_check_kflag_member, ++ .log_details = btf_enum_log, ++ .seq_show = btf_enum_seq_show, ++}; ++ ++static s32 btf_func_proto_check_meta(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left) ++{ ++ u32 meta_needed = btf_type_vlen(t) * sizeof(struct btf_param); ++ ++ if (meta_left < meta_needed) { ++ btf_verifier_log_basic(env, t, ++ "meta_left:%u meta_needed:%u", ++ meta_left, meta_needed); ++ return -EINVAL; ++ } ++ ++ if (t->name_off) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } ++ ++ if (btf_type_kflag(t)) { ++ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); ++ return -EINVAL; ++ } ++ ++ btf_verifier_log_type(env, t, NULL); ++ ++ return meta_needed; ++} ++ ++static void btf_func_proto_log(struct btf_verifier_env *env, ++ const struct btf_type *t) ++{ ++ const struct btf_param *args = (const struct btf_param *)(t + 1); ++ u16 nr_args = btf_type_vlen(t), i; ++ ++ btf_verifier_log(env, "return=%u args=(", t->type); ++ if (!nr_args) { ++ btf_verifier_log(env, "void"); ++ goto done; ++ } ++ ++ if (nr_args == 1 && !args[0].type) { ++ /* Only one vararg */ ++ btf_verifier_log(env, "vararg"); ++ goto done; ++ } ++ ++ btf_verifier_log(env, "%u %s", args[0].type, ++ __btf_name_by_offset(env->btf, ++ args[0].name_off)); ++ for (i = 1; i < nr_args - 1; i++) ++ btf_verifier_log(env, ", %u %s", args[i].type, ++ __btf_name_by_offset(env->btf, ++ args[i].name_off)); ++ ++ if (nr_args > 1) { ++ const struct btf_param *last_arg = &args[nr_args - 1]; ++ ++ if (last_arg->type) ++ btf_verifier_log(env, ", %u %s", last_arg->type, ++ __btf_name_by_offset(env->btf, ++ last_arg->name_off)); ++ else ++ btf_verifier_log(env, ", vararg"); ++ } ++ ++done: ++ btf_verifier_log(env, ")"); ++} ++ ++static struct btf_kind_operations func_proto_ops = { ++ .check_meta = btf_func_proto_check_meta, ++ .resolve = btf_df_resolve, ++ /* ++ * BTF_KIND_FUNC_PROTO cannot be directly referred by ++ * a struct's member. ++ * ++ * It should be a funciton pointer instead. ++ * (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO) ++ * ++ * Hence, there is no btf_func_check_member(). ++ */ ++ .check_member = btf_df_check_member, ++ .check_kflag_member = btf_df_check_kflag_member, ++ .log_details = btf_func_proto_log, ++ .seq_show = btf_df_seq_show, ++}; ++ ++static s32 btf_func_check_meta(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left) ++{ ++ if (!t->name_off || ++ !btf_name_valid_identifier(env->btf, t->name_off)) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } ++ ++ if (btf_type_vlen(t)) { ++ btf_verifier_log_type(env, t, "vlen != 0"); ++ return -EINVAL; ++ } ++ ++ if (btf_type_kflag(t)) { ++ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); ++ return -EINVAL; ++ } ++ ++ btf_verifier_log_type(env, t, NULL); ++ ++ return 0; ++} ++ ++static struct btf_kind_operations func_ops = { ++ .check_meta = btf_func_check_meta, ++ .resolve = btf_df_resolve, ++ .check_member = btf_df_check_member, ++ .check_kflag_member = btf_df_check_kflag_member, ++ .log_details = btf_ref_type_log, ++ .seq_show = btf_df_seq_show, ++}; ++ ++static s32 btf_var_check_meta(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left) ++{ ++ const struct btf_var *var; ++ u32 meta_needed = sizeof(*var); ++ ++ if (meta_left < meta_needed) { ++ btf_verifier_log_basic(env, t, ++ "meta_left:%u meta_needed:%u", ++ meta_left, meta_needed); ++ return -EINVAL; ++ } ++ ++ if (btf_type_vlen(t)) { ++ btf_verifier_log_type(env, t, "vlen != 0"); ++ return -EINVAL; ++ } ++ ++ if (btf_type_kflag(t)) { ++ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); ++ return -EINVAL; ++ } ++ ++ if (!t->name_off || ++ !__btf_name_valid(env->btf, t->name_off, true)) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } ++ ++ /* A var cannot be in type void */ ++ if (!t->type || !BTF_TYPE_ID_VALID(t->type)) { ++ btf_verifier_log_type(env, t, "Invalid type_id"); ++ return -EINVAL; ++ } ++ ++ var = btf_type_var(t); ++ if (var->linkage != BTF_VAR_STATIC && ++ var->linkage != BTF_VAR_GLOBAL_ALLOCATED) { ++ btf_verifier_log_type(env, t, "Linkage not supported"); ++ return -EINVAL; ++ } ++ ++ btf_verifier_log_type(env, t, NULL); ++ ++ return meta_needed; ++} ++ ++static void btf_var_log(struct btf_verifier_env *env, const struct btf_type *t) ++{ ++ const struct btf_var *var = btf_type_var(t); ++ ++ btf_verifier_log(env, "type_id=%u linkage=%u", t->type, var->linkage); ++} ++ ++static const struct btf_kind_operations var_ops = { ++ .check_meta = btf_var_check_meta, ++ .resolve = btf_var_resolve, ++ .check_member = btf_df_check_member, ++ .check_kflag_member = btf_df_check_kflag_member, ++ .log_details = btf_var_log, ++ .seq_show = btf_var_seq_show, ++}; ++ ++static s32 btf_datasec_check_meta(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left) ++{ ++ const struct btf_var_secinfo *vsi; ++ u64 last_vsi_end_off = 0, sum = 0; ++ u32 i, meta_needed; ++ ++ meta_needed = btf_type_vlen(t) * sizeof(*vsi); ++ if (meta_left < meta_needed) { ++ btf_verifier_log_basic(env, t, ++ "meta_left:%u meta_needed:%u", ++ meta_left, meta_needed); ++ return -EINVAL; ++ } ++ ++ if (!btf_type_vlen(t)) { ++ btf_verifier_log_type(env, t, "vlen == 0"); ++ return -EINVAL; ++ } ++ ++ if (!t->size) { ++ btf_verifier_log_type(env, t, "size == 0"); ++ return -EINVAL; ++ } ++ ++ if (btf_type_kflag(t)) { ++ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); ++ return -EINVAL; ++ } ++ ++ if (!t->name_off || ++ !btf_name_valid_section(env->btf, t->name_off)) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } ++ ++ btf_verifier_log_type(env, t, NULL); ++ ++ for_each_vsi(i, t, vsi) { ++ /* A var cannot be in type void */ ++ if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) { ++ btf_verifier_log_vsi(env, t, vsi, ++ "Invalid type_id"); ++ return -EINVAL; ++ } ++ ++ if (vsi->offset < last_vsi_end_off || vsi->offset >= t->size) { ++ btf_verifier_log_vsi(env, t, vsi, ++ "Invalid offset"); ++ return -EINVAL; ++ } ++ ++ if (!vsi->size || vsi->size > t->size) { ++ btf_verifier_log_vsi(env, t, vsi, ++ "Invalid size"); ++ return -EINVAL; ++ } ++ ++ last_vsi_end_off = vsi->offset + vsi->size; ++ if (last_vsi_end_off > t->size) { ++ btf_verifier_log_vsi(env, t, vsi, ++ "Invalid offset+size"); ++ return -EINVAL; ++ } ++ ++ btf_verifier_log_vsi(env, t, vsi, NULL); ++ sum += vsi->size; ++ } ++ ++ if (t->size < sum) { ++ btf_verifier_log_type(env, t, "Invalid btf_info size"); ++ return -EINVAL; ++ } ++ ++ return meta_needed; ++} ++ ++static int btf_datasec_resolve(struct btf_verifier_env *env, ++ const struct resolve_vertex *v) ++{ ++ const struct btf_var_secinfo *vsi; ++ struct btf *btf = env->btf; ++ u16 i; ++ ++ for_each_vsi_from(i, v->next_member, v->t, vsi) { ++ u32 var_type_id = vsi->type, type_id, type_size = 0; ++ const struct btf_type *var_type = btf_type_by_id(env->btf, ++ var_type_id); ++ if (!var_type || !btf_type_is_var(var_type)) { ++ btf_verifier_log_vsi(env, v->t, vsi, ++ "Not a VAR kind member"); ++ return -EINVAL; ++ } ++ ++ if (!env_type_is_resolve_sink(env, var_type) && ++ !env_type_is_resolved(env, var_type_id)) { ++ env_stack_set_next_member(env, i + 1); ++ return env_stack_push(env, var_type, var_type_id); ++ } ++ ++ type_id = var_type->type; ++ if (!btf_type_id_size(btf, &type_id, &type_size)) { ++ btf_verifier_log_vsi(env, v->t, vsi, "Invalid type"); ++ return -EINVAL; ++ } ++ ++ if (vsi->size < type_size) { ++ btf_verifier_log_vsi(env, v->t, vsi, "Invalid size"); ++ return -EINVAL; ++ } ++ } ++ ++ env_stack_pop_resolved(env, 0, 0); ++ return 0; ++} ++ ++static void btf_datasec_log(struct btf_verifier_env *env, ++ const struct btf_type *t) ++{ ++ btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); ++} ++ ++static void btf_datasec_seq_show(const struct btf *btf, ++ const struct btf_type *t, u32 type_id, ++ void *data, u8 bits_offset, ++ struct seq_file *m) ++{ ++ const struct btf_var_secinfo *vsi; ++ const struct btf_type *var; ++ u32 i; ++ ++ seq_printf(m, "section (\"%s\") = {", __btf_name_by_offset(btf, t->name_off)); ++ for_each_vsi(i, t, vsi) { ++ var = btf_type_by_id(btf, vsi->type); ++ if (i) ++ seq_puts(m, ","); ++ btf_type_ops(var)->seq_show(btf, var, vsi->type, ++ data + vsi->offset, bits_offset, m); ++ } ++ seq_puts(m, "}"); ++} ++ ++static const struct btf_kind_operations datasec_ops = { ++ .check_meta = btf_datasec_check_meta, ++ .resolve = btf_datasec_resolve, ++ .check_member = btf_df_check_member, ++ .check_kflag_member = btf_df_check_kflag_member, ++ .log_details = btf_datasec_log, ++ .seq_show = btf_datasec_seq_show, ++}; ++ ++static int btf_func_proto_check(struct btf_verifier_env *env, ++ const struct btf_type *t) ++{ ++ const struct btf_type *ret_type; ++ const struct btf_param *args; ++ const struct btf *btf; ++ u16 nr_args, i; ++ int err; ++ ++ btf = env->btf; ++ args = (const struct btf_param *)(t + 1); ++ nr_args = btf_type_vlen(t); ++ ++ /* Check func return type which could be "void" (t->type == 0) */ ++ if (t->type) { ++ u32 ret_type_id = t->type; ++ ++ ret_type = btf_type_by_id(btf, ret_type_id); ++ if (!ret_type) { ++ btf_verifier_log_type(env, t, "Invalid return type"); ++ return -EINVAL; ++ } ++ ++ if (btf_type_needs_resolve(ret_type) && ++ !env_type_is_resolved(env, ret_type_id)) { ++ err = btf_resolve(env, ret_type, ret_type_id); ++ if (err) ++ return err; ++ } ++ ++ /* Ensure the return type is a type that has a size */ ++ if (!btf_type_id_size(btf, &ret_type_id, NULL)) { ++ btf_verifier_log_type(env, t, "Invalid return type"); ++ return -EINVAL; ++ } ++ } ++ ++ if (!nr_args) ++ return 0; ++ ++ /* Last func arg type_id could be 0 if it is a vararg */ ++ if (!args[nr_args - 1].type) { ++ if (args[nr_args - 1].name_off) { ++ btf_verifier_log_type(env, t, "Invalid arg#%u", ++ nr_args); ++ return -EINVAL; ++ } ++ nr_args--; ++ } ++ ++ err = 0; ++ for (i = 0; i < nr_args; i++) { ++ const struct btf_type *arg_type; ++ u32 arg_type_id; ++ ++ arg_type_id = args[i].type; ++ arg_type = btf_type_by_id(btf, arg_type_id); ++ if (!arg_type) { ++ btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); ++ err = -EINVAL; ++ break; ++ } ++ ++ if (args[i].name_off && ++ (!btf_name_offset_valid(btf, args[i].name_off) || ++ !btf_name_valid_identifier(btf, args[i].name_off))) { ++ btf_verifier_log_type(env, t, ++ "Invalid arg#%u", i + 1); ++ err = -EINVAL; ++ break; ++ } ++ ++ if (btf_type_needs_resolve(arg_type) && ++ !env_type_is_resolved(env, arg_type_id)) { ++ err = btf_resolve(env, arg_type, arg_type_id); ++ if (err) ++ break; ++ } ++ ++ if (!btf_type_id_size(btf, &arg_type_id, NULL)) { ++ btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); ++ err = -EINVAL; ++ break; ++ } ++ } ++ ++ return err; ++} ++ ++static int btf_func_check(struct btf_verifier_env *env, ++ const struct btf_type *t) ++{ ++ const struct btf_type *proto_type; ++ const struct btf_param *args; ++ const struct btf *btf; ++ u16 nr_args, i; ++ ++ btf = env->btf; ++ proto_type = btf_type_by_id(btf, t->type); ++ ++ if (!proto_type || !btf_type_is_func_proto(proto_type)) { ++ btf_verifier_log_type(env, t, "Invalid type_id"); ++ return -EINVAL; ++ } ++ ++ args = (const struct btf_param *)(proto_type + 1); ++ nr_args = btf_type_vlen(proto_type); ++ for (i = 0; i < nr_args; i++) { ++ if (!args[i].name_off && args[i].type) { ++ btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ ++static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { ++ [BTF_KIND_INT] = &int_ops, ++ [BTF_KIND_PTR] = &ptr_ops, ++ [BTF_KIND_ARRAY] = &array_ops, ++ [BTF_KIND_STRUCT] = &struct_ops, ++ [BTF_KIND_UNION] = &struct_ops, ++ [BTF_KIND_ENUM] = &enum_ops, ++ [BTF_KIND_FWD] = &fwd_ops, ++ [BTF_KIND_TYPEDEF] = &modifier_ops, ++ [BTF_KIND_VOLATILE] = &modifier_ops, ++ [BTF_KIND_CONST] = &modifier_ops, ++ [BTF_KIND_RESTRICT] = &modifier_ops, ++ [BTF_KIND_FUNC] = &func_ops, ++ [BTF_KIND_FUNC_PROTO] = &func_proto_ops, ++ [BTF_KIND_VAR] = &var_ops, ++ [BTF_KIND_DATASEC] = &datasec_ops, ++}; ++ ++static s32 btf_check_meta(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 meta_left) ++{ ++ u32 saved_meta_left = meta_left; ++ s32 var_meta_size; ++ ++ if (meta_left < sizeof(*t)) { ++ btf_verifier_log(env, "[%u] meta_left:%u meta_needed:%zu", ++ env->log_type_id, meta_left, sizeof(*t)); ++ return -EINVAL; ++ } ++ meta_left -= sizeof(*t); ++ ++ if (t->info & ~BTF_INFO_MASK) { ++ btf_verifier_log(env, "[%u] Invalid btf_info:%x", ++ env->log_type_id, t->info); ++ return -EINVAL; ++ } ++ ++ if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX || ++ BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) { ++ btf_verifier_log(env, "[%u] Invalid kind:%u", ++ env->log_type_id, BTF_INFO_KIND(t->info)); ++ return -EINVAL; ++ } ++ ++ if (!btf_name_offset_valid(env->btf, t->name_off)) { ++ btf_verifier_log(env, "[%u] Invalid name_offset:%u", ++ env->log_type_id, t->name_off); ++ return -EINVAL; ++ } ++ ++ var_meta_size = btf_type_ops(t)->check_meta(env, t, meta_left); ++ if (var_meta_size < 0) ++ return var_meta_size; ++ ++ meta_left -= var_meta_size; ++ ++ return saved_meta_left - meta_left; ++} ++ ++static int btf_check_all_metas(struct btf_verifier_env *env) ++{ ++ struct btf *btf = env->btf; ++ struct btf_header *hdr; ++ void *cur, *end; ++ ++ hdr = &btf->hdr; ++ cur = btf->nohdr_data + hdr->type_off; ++ end = cur + hdr->type_len; ++ ++ env->log_type_id = 1; ++ while (cur < end) { ++ struct btf_type *t = cur; ++ s32 meta_size; ++ ++ meta_size = btf_check_meta(env, t, end - cur); ++ if (meta_size < 0) ++ return meta_size; ++ ++ btf_add_type(env, t); ++ cur += meta_size; ++ env->log_type_id++; ++ } ++ ++ return 0; ++} ++ ++static bool btf_resolve_valid(struct btf_verifier_env *env, ++ const struct btf_type *t, ++ u32 type_id) ++{ ++ struct btf *btf = env->btf; ++ ++ if (!env_type_is_resolved(env, type_id)) ++ return false; ++ ++ if (btf_type_is_struct(t) || btf_type_is_datasec(t)) ++ return !btf->resolved_ids[type_id] && ++ !btf->resolved_sizes[type_id]; ++ ++ if (btf_type_is_modifier(t) || btf_type_is_ptr(t) || ++ btf_type_is_var(t)) { ++ t = btf_type_id_resolve(btf, &type_id); ++ return t && ++ !btf_type_is_modifier(t) && ++ !btf_type_is_var(t) && ++ !btf_type_is_datasec(t); ++ } ++ ++ if (btf_type_is_array(t)) { ++ const struct btf_array *array = btf_type_array(t); ++ const struct btf_type *elem_type; ++ u32 elem_type_id = array->type; ++ u32 elem_size; ++ ++ elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); ++ return elem_type && !btf_type_is_modifier(elem_type) && ++ (array->nelems * elem_size == ++ btf->resolved_sizes[type_id]); ++ } ++ ++ return false; ++} ++ ++static int btf_resolve(struct btf_verifier_env *env, ++ const struct btf_type *t, u32 type_id) ++{ ++ u32 save_log_type_id = env->log_type_id; ++ const struct resolve_vertex *v; ++ int err = 0; ++ ++ env->resolve_mode = RESOLVE_TBD; ++ env_stack_push(env, t, type_id); ++ while (!err && (v = env_stack_peak(env))) { ++ env->log_type_id = v->type_id; ++ err = btf_type_ops(v->t)->resolve(env, v); ++ } ++ ++ env->log_type_id = type_id; ++ if (err == -E2BIG) { ++ btf_verifier_log_type(env, t, ++ "Exceeded max resolving depth:%u", ++ MAX_RESOLVE_DEPTH); ++ } else if (err == -EEXIST) { ++ btf_verifier_log_type(env, t, "Loop detected"); ++ } ++ ++ /* Final sanity check */ ++ if (!err && !btf_resolve_valid(env, t, type_id)) { ++ btf_verifier_log_type(env, t, "Invalid resolve state"); ++ err = -EINVAL; ++ } ++ ++ env->log_type_id = save_log_type_id; ++ return err; ++} ++ ++static int btf_check_all_types(struct btf_verifier_env *env) ++{ ++ struct btf *btf = env->btf; ++ u32 type_id; ++ int err; ++ ++ err = env_resolve_init(env); ++ if (err) ++ return err; ++ ++ env->phase++; ++ for (type_id = 1; type_id <= btf->nr_types; type_id++) { ++ const struct btf_type *t = btf_type_by_id(btf, type_id); ++ ++ env->log_type_id = type_id; ++ if (btf_type_needs_resolve(t) && ++ !env_type_is_resolved(env, type_id)) { ++ err = btf_resolve(env, t, type_id); ++ if (err) ++ return err; ++ } ++ ++ if (btf_type_is_func_proto(t)) { ++ err = btf_func_proto_check(env, t); ++ if (err) ++ return err; ++ } ++ ++ if (btf_type_is_func(t)) { ++ err = btf_func_check(env, t); ++ if (err) ++ return err; ++ } ++ } ++ ++ return 0; ++} ++ ++static int btf_parse_type_sec(struct btf_verifier_env *env) ++{ ++ const struct btf_header *hdr = &env->btf->hdr; ++ int err; ++ ++ /* Type section must align to 4 bytes */ ++ if (hdr->type_off & (sizeof(u32) - 1)) { ++ btf_verifier_log(env, "Unaligned type_off"); ++ return -EINVAL; ++ } ++ ++ if (!hdr->type_len) { ++ btf_verifier_log(env, "No type found"); ++ return -EINVAL; ++ } ++ ++ err = btf_check_all_metas(env); ++ if (err) ++ return err; ++ ++ return btf_check_all_types(env); ++} ++ ++static int btf_parse_str_sec(struct btf_verifier_env *env) ++{ ++ const struct btf_header *hdr; ++ struct btf *btf = env->btf; ++ const char *start, *end; ++ ++ hdr = &btf->hdr; ++ start = btf->nohdr_data + hdr->str_off; ++ end = start + hdr->str_len; ++ ++ if (end != btf->data + btf->data_size) { ++ btf_verifier_log(env, "String section is not at the end"); ++ return -EINVAL; ++ } ++ ++ if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || ++ start[0] || end[-1]) { ++ btf_verifier_log(env, "Invalid string section"); ++ return -EINVAL; ++ } ++ ++ btf->strings = start; ++ ++ return 0; ++} ++ ++static const size_t btf_sec_info_offset[] = { ++ offsetof(struct btf_header, type_off), ++ offsetof(struct btf_header, str_off), ++}; ++ ++static int btf_sec_info_cmp(const void *a, const void *b) ++{ ++ const struct btf_sec_info *x = a; ++ const struct btf_sec_info *y = b; ++ ++ return (int)(x->off - y->off) ? : (int)(x->len - y->len); ++} ++ ++static int btf_check_sec_info(struct btf_verifier_env *env, ++ u32 btf_data_size) ++{ ++ struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)]; ++ u32 total, expected_total, i; ++ const struct btf_header *hdr; ++ const struct btf *btf; ++ ++ btf = env->btf; ++ hdr = &btf->hdr; ++ ++ /* Populate the secs from hdr */ ++ for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) ++ secs[i] = *(struct btf_sec_info *)((void *)hdr + ++ btf_sec_info_offset[i]); ++ ++ sort(secs, ARRAY_SIZE(btf_sec_info_offset), ++ sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL); ++ ++ /* Check for gaps and overlap among sections */ ++ total = 0; ++ expected_total = btf_data_size - hdr->hdr_len; ++ for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) { ++ if (expected_total < secs[i].off) { ++ btf_verifier_log(env, "Invalid section offset"); ++ return -EINVAL; ++ } ++ if (total < secs[i].off) { ++ /* gap */ ++ btf_verifier_log(env, "Unsupported section found"); ++ return -EINVAL; ++ } ++ if (total > secs[i].off) { ++ btf_verifier_log(env, "Section overlap found"); ++ return -EINVAL; ++ } ++ if (expected_total - total < secs[i].len) { ++ btf_verifier_log(env, ++ "Total section length too long"); ++ return -EINVAL; ++ } ++ total += secs[i].len; ++ } ++ ++ /* There is data other than hdr and known sections */ ++ if (expected_total != total) { ++ btf_verifier_log(env, "Unsupported section found"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int btf_parse_hdr(struct btf_verifier_env *env) ++{ ++ u32 hdr_len, hdr_copy, btf_data_size; ++ const struct btf_header *hdr; ++ struct btf *btf; ++ int err; ++ ++ btf = env->btf; ++ btf_data_size = btf->data_size; ++ ++ if (btf_data_size < ++ offsetof(struct btf_header, hdr_len) + sizeof(hdr->hdr_len)) { ++ btf_verifier_log(env, "hdr_len not found"); ++ return -EINVAL; ++ } ++ ++ hdr = btf->data; ++ hdr_len = hdr->hdr_len; ++ if (btf_data_size < hdr_len) { ++ btf_verifier_log(env, "btf_header not found"); ++ return -EINVAL; ++ } ++ ++ /* Ensure the unsupported header fields are zero */ ++ if (hdr_len > sizeof(btf->hdr)) { ++ u8 *expected_zero = btf->data + sizeof(btf->hdr); ++ u8 *end = btf->data + hdr_len; ++ ++ for (; expected_zero < end; expected_zero++) { ++ if (*expected_zero) { ++ btf_verifier_log(env, "Unsupported btf_header"); ++ return -E2BIG; ++ } ++ } ++ } ++ ++ hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr)); ++ memcpy(&btf->hdr, btf->data, hdr_copy); ++ ++ hdr = &btf->hdr; ++ ++ btf_verifier_log_hdr(env, btf_data_size); ++ ++ if (hdr->magic != BTF_MAGIC) { ++ btf_verifier_log(env, "Invalid magic"); ++ return -EINVAL; ++ } ++ ++ if (hdr->version != BTF_VERSION) { ++ btf_verifier_log(env, "Unsupported version"); ++ return -ENOTSUPP; ++ } ++ ++ if (hdr->flags) { ++ btf_verifier_log(env, "Unsupported flags"); ++ return -ENOTSUPP; ++ } ++ ++ if (btf_data_size == hdr->hdr_len) { ++ btf_verifier_log(env, "No data"); ++ return -EINVAL; ++ } ++ ++ err = btf_check_sec_info(env, btf_data_size); ++ if (err) ++ return err; ++ ++ return 0; ++} ++ ++static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, ++ u32 log_level, char __user *log_ubuf, u32 log_size) ++{ ++ struct btf_verifier_env *env = NULL; ++ struct bpf_verifier_log *log; ++ struct btf *btf = NULL; ++ u8 *data; ++ int err; ++ ++ if (btf_data_size > BTF_MAX_SIZE) ++ return ERR_PTR(-E2BIG); ++ ++ env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); ++ if (!env) ++ return ERR_PTR(-ENOMEM); ++ ++ log = &env->log; ++ if (log_level || log_ubuf || log_size) { ++ /* user requested verbose verifier output ++ * and supplied buffer to store the verification trace ++ */ ++ log->level = log_level; ++ log->ubuf = log_ubuf; ++ log->len_total = log_size; ++ ++ /* log attributes have to be sane */ ++ if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || ++ !log->level || !log->ubuf) { ++ err = -EINVAL; ++ goto errout; ++ } ++ } ++ ++ btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); ++ if (!btf) { ++ err = -ENOMEM; ++ goto errout; ++ } ++ env->btf = btf; ++ ++ data = kmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN); ++ if (!data) { ++ err = -ENOMEM; ++ goto errout; ++ } ++ ++ btf->data = data; ++ btf->data_size = btf_data_size; ++ ++ if (copy_from_user(data, btf_data, btf_data_size)) { ++ err = -EFAULT; ++ goto errout; ++ } ++ ++ err = btf_parse_hdr(env); ++ if (err) ++ goto errout; ++ ++ btf->nohdr_data = btf->data + btf->hdr.hdr_len; ++ ++ err = btf_parse_str_sec(env); ++ if (err) ++ goto errout; ++ ++ err = btf_parse_type_sec(env); ++ if (err) ++ goto errout; ++ ++ if (log->level && bpf_verifier_log_full(log)) { ++ err = -ENOSPC; ++ goto errout; ++ } ++ ++ btf_verifier_env_free(env); ++ refcount_set(&btf->refcnt, 1); ++ return btf; ++ ++errout: ++ btf_verifier_env_free(env); ++ if (btf) ++ btf_free(btf); ++ return ERR_PTR(err); ++} ++ ++void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, ++ struct seq_file *m) ++{ ++ const struct btf_type *t = btf_type_by_id(btf, type_id); ++ ++ btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m); ++} ++ ++#ifdef CONFIG_PROC_FS ++static void bpf_btf_show_fdinfo(struct seq_file *m, struct file *filp) ++{ ++ const struct btf *btf = filp->private_data; ++ ++ seq_printf(m, "btf_id:\t%u\n", btf->id); ++} ++#endif ++ ++static int btf_release(struct inode *inode, struct file *filp) ++{ ++ btf_put(filp->private_data); ++ return 0; ++} ++ ++const struct file_operations btf_fops = { ++#ifdef CONFIG_PROC_FS ++ .show_fdinfo = bpf_btf_show_fdinfo, ++#endif ++ .release = btf_release, ++}; ++ ++static int __btf_new_fd(struct btf *btf) ++{ ++ return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC); ++} ++ ++int btf_new_fd(const union bpf_attr *attr) ++{ ++ struct btf *btf; ++ int ret; ++ ++ btf = btf_parse(u64_to_user_ptr(attr->btf), ++ attr->btf_size, attr->btf_log_level, ++ u64_to_user_ptr(attr->btf_log_buf), ++ attr->btf_log_size); ++ if (IS_ERR(btf)) ++ return PTR_ERR(btf); ++ ++ ret = btf_alloc_id(btf); ++ if (ret) { ++ btf_free(btf); ++ return ret; ++ } ++ ++ /* ++ * The BTF ID is published to the userspace. ++ * All BTF free must go through call_rcu() from ++ * now on (i.e. free by calling btf_put()). ++ */ ++ ++ ret = __btf_new_fd(btf); ++ if (ret < 0) ++ btf_put(btf); ++ ++ return ret; ++} ++ ++struct btf *btf_get_by_fd(int fd) ++{ ++ struct btf *btf; ++ struct fd f; ++ ++ f = fdget(fd); ++ ++ if (!f.file) ++ return ERR_PTR(-EBADF); ++ ++ if (f.file->f_op != &btf_fops) { ++ fdput(f); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ btf = f.file->private_data; ++ refcount_inc(&btf->refcnt); ++ fdput(f); ++ ++ return btf; ++} ++ ++int btf_get_info_by_fd(const struct btf *btf, ++ const union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ struct bpf_btf_info __user *uinfo; ++ struct bpf_btf_info info; ++ u32 info_copy, btf_copy; ++ void __user *ubtf; ++ u32 uinfo_len; ++ ++ uinfo = u64_to_user_ptr(attr->info.info); ++ uinfo_len = attr->info.info_len; ++ ++ info_copy = min_t(u32, uinfo_len, sizeof(info)); ++ memset(&info, 0, sizeof(info)); ++ if (copy_from_user(&info, uinfo, info_copy)) ++ return -EFAULT; ++ ++ info.id = btf->id; ++ ubtf = u64_to_user_ptr(info.btf); ++ btf_copy = min_t(u32, btf->data_size, info.btf_size); ++ if (copy_to_user(ubtf, btf->data, btf_copy)) ++ return -EFAULT; ++ info.btf_size = btf->data_size; ++ ++ if (copy_to_user(uinfo, &info, info_copy) || ++ put_user(info_copy, &uattr->info.info_len)) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++int btf_get_fd_by_id(u32 id) ++{ ++ struct btf *btf; ++ int fd; ++ ++ rcu_read_lock(); ++ btf = idr_find(&btf_idr, id); ++ if (!btf || !refcount_inc_not_zero(&btf->refcnt)) ++ btf = ERR_PTR(-ENOENT); ++ rcu_read_unlock(); ++ ++ if (IS_ERR(btf)) ++ return PTR_ERR(btf); ++ ++ fd = __btf_new_fd(btf); ++ if (fd < 0) ++ btf_put(btf); ++ ++ return fd; ++} ++ ++u32 btf_id(const struct btf *btf) ++{ ++ return btf->id; ++} +--- /dev/null ++++ b/kernel/bpf/cgroup.c +@@ -0,0 +1,1581 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Functions to manage eBPF programs attached to cgroups ++ * ++ * Copyright (c) 2016 Daniel Mack ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "../cgroup/cgroup-internal.h" ++ ++DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); ++EXPORT_SYMBOL(cgroup_bpf_enabled_key); ++ ++void cgroup_bpf_offline(struct cgroup *cgrp) ++{ ++ cgroup_get(cgrp); ++ percpu_ref_kill(&cgrp->bpf.refcnt); ++} ++ ++/** ++ * cgroup_bpf_release() - put references of all bpf programs and ++ * release all cgroup bpf data ++ * @work: work structure embedded into the cgroup to modify ++ */ ++static void cgroup_bpf_release(struct work_struct *work) ++{ ++ struct cgroup *p, *cgrp = container_of(work, struct cgroup, ++ bpf.release_work); ++ enum bpf_cgroup_storage_type stype; ++ struct bpf_prog_array *old_array; ++ unsigned int type; ++ ++ mutex_lock(&cgroup_mutex); ++ ++ for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { ++ struct list_head *progs = &cgrp->bpf.progs[type]; ++ struct bpf_prog_list *pl, *tmp; ++ ++ list_for_each_entry_safe(pl, tmp, progs, node) { ++ list_del(&pl->node); ++ bpf_prog_put(pl->prog); ++ for_each_cgroup_storage_type(stype) { ++ bpf_cgroup_storage_unlink(pl->storage[stype]); ++ bpf_cgroup_storage_free(pl->storage[stype]); ++ } ++ kfree(pl); ++ static_branch_dec(&cgroup_bpf_enabled_key); ++ } ++ old_array = rcu_dereference_protected( ++ cgrp->bpf.effective[type], ++ lockdep_is_held(&cgroup_mutex)); ++ bpf_prog_array_free(old_array); ++ } ++ ++ mutex_unlock(&cgroup_mutex); ++ ++ for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) ++ cgroup_bpf_put(p); ++ ++ percpu_ref_exit(&cgrp->bpf.refcnt); ++ cgroup_put(cgrp); ++} ++ ++/** ++ * cgroup_bpf_release_fn() - callback used to schedule releasing ++ * of bpf cgroup data ++ * @ref: percpu ref counter structure ++ */ ++static void cgroup_bpf_release_fn(struct percpu_ref *ref) ++{ ++ struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt); ++ ++ INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release); ++ queue_work(system_wq, &cgrp->bpf.release_work); ++} ++ ++/* count number of elements in the list. ++ * it's slow but the list cannot be long ++ */ ++static u32 prog_list_length(struct list_head *head) ++{ ++ struct bpf_prog_list *pl; ++ u32 cnt = 0; ++ ++ list_for_each_entry(pl, head, node) { ++ if (!pl->prog) ++ continue; ++ cnt++; ++ } ++ return cnt; ++} ++ ++/* if parent has non-overridable prog attached, ++ * disallow attaching new programs to the descendent cgroup. ++ * if parent has overridable or multi-prog, allow attaching ++ */ ++static bool hierarchy_allows_attach(struct cgroup *cgrp, ++ enum bpf_attach_type type, ++ u32 new_flags) ++{ ++ struct cgroup *p; ++ ++ p = cgroup_parent(cgrp); ++ if (!p) ++ return true; ++ do { ++ u32 flags = p->bpf.flags[type]; ++ u32 cnt; ++ ++ if (flags & BPF_F_ALLOW_MULTI) ++ return true; ++ cnt = prog_list_length(&p->bpf.progs[type]); ++ WARN_ON_ONCE(cnt > 1); ++ if (cnt == 1) ++ return !!(flags & BPF_F_ALLOW_OVERRIDE); ++ p = cgroup_parent(p); ++ } while (p); ++ return true; ++} ++ ++/* compute a chain of effective programs for a given cgroup: ++ * start from the list of programs in this cgroup and add ++ * all parent programs. ++ * Note that parent's F_ALLOW_OVERRIDE-type program is yielding ++ * to programs in this cgroup ++ */ ++static int compute_effective_progs(struct cgroup *cgrp, ++ enum bpf_attach_type type, ++ struct bpf_prog_array **array) ++{ ++ enum bpf_cgroup_storage_type stype; ++ struct bpf_prog_array *progs; ++ struct bpf_prog_list *pl; ++ struct cgroup *p = cgrp; ++ int cnt = 0; ++ ++ /* count number of effective programs by walking parents */ ++ do { ++ if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) ++ cnt += prog_list_length(&p->bpf.progs[type]); ++ p = cgroup_parent(p); ++ } while (p); ++ ++ progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); ++ if (!progs) ++ return -ENOMEM; ++ ++ /* populate the array with effective progs */ ++ cnt = 0; ++ p = cgrp; ++ do { ++ if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) ++ continue; ++ ++ list_for_each_entry(pl, &p->bpf.progs[type], node) { ++ if (!pl->prog) ++ continue; ++ ++ progs->items[cnt].prog = pl->prog; ++ for_each_cgroup_storage_type(stype) ++ progs->items[cnt].cgroup_storage[stype] = ++ pl->storage[stype]; ++ cnt++; ++ } ++ } while ((p = cgroup_parent(p))); ++ ++ *array = progs; ++ return 0; ++} ++ ++static void activate_effective_progs(struct cgroup *cgrp, ++ enum bpf_attach_type type, ++ struct bpf_prog_array *old_array) ++{ ++ rcu_swap_protected(cgrp->bpf.effective[type], old_array, ++ lockdep_is_held(&cgroup_mutex)); ++ /* free prog array after grace period, since __cgroup_bpf_run_*() ++ * might be still walking the array ++ */ ++ bpf_prog_array_free(old_array); ++} ++ ++/** ++ * cgroup_bpf_inherit() - inherit effective programs from parent ++ * @cgrp: the cgroup to modify ++ */ ++int cgroup_bpf_inherit(struct cgroup *cgrp) ++{ ++/* has to use marco instead of const int, since compiler thinks ++ * that array below is variable length ++ */ ++#define NR ARRAY_SIZE(cgrp->bpf.effective) ++ struct bpf_prog_array *arrays[NR] = {}; ++ struct cgroup *p; ++ int ret, i; ++ ++ ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0, ++ GFP_KERNEL); ++ if (ret) ++ return ret; ++ ++ for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) ++ cgroup_bpf_get(p); ++ ++ for (i = 0; i < NR; i++) ++ INIT_LIST_HEAD(&cgrp->bpf.progs[i]); ++ ++ for (i = 0; i < NR; i++) ++ if (compute_effective_progs(cgrp, i, &arrays[i])) ++ goto cleanup; ++ ++ for (i = 0; i < NR; i++) ++ activate_effective_progs(cgrp, i, arrays[i]); ++ ++ return 0; ++cleanup: ++ for (i = 0; i < NR; i++) ++ bpf_prog_array_free(arrays[i]); ++ ++ for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) ++ cgroup_bpf_put(p); ++ ++ percpu_ref_exit(&cgrp->bpf.refcnt); ++ ++ return -ENOMEM; ++} ++ ++static int update_effective_progs(struct cgroup *cgrp, ++ enum bpf_attach_type type) ++{ ++ struct cgroup_subsys_state *css; ++ int err; ++ ++ /* allocate and recompute effective prog arrays */ ++ css_for_each_descendant_pre(css, &cgrp->self) { ++ struct cgroup *desc = container_of(css, struct cgroup, self); ++ ++ if (percpu_ref_is_zero(&desc->bpf.refcnt)) ++ continue; ++ ++ err = compute_effective_progs(desc, type, &desc->bpf.inactive); ++ if (err) ++ goto cleanup; ++ } ++ ++ /* all allocations were successful. Activate all prog arrays */ ++ css_for_each_descendant_pre(css, &cgrp->self) { ++ struct cgroup *desc = container_of(css, struct cgroup, self); ++ ++ if (percpu_ref_is_zero(&desc->bpf.refcnt)) { ++ if (unlikely(desc->bpf.inactive)) { ++ bpf_prog_array_free(desc->bpf.inactive); ++ desc->bpf.inactive = NULL; ++ } ++ continue; ++ } ++ ++ activate_effective_progs(desc, type, desc->bpf.inactive); ++ desc->bpf.inactive = NULL; ++ } ++ ++ return 0; ++ ++cleanup: ++ /* oom while computing effective. Free all computed effective arrays ++ * since they were not activated ++ */ ++ css_for_each_descendant_pre(css, &cgrp->self) { ++ struct cgroup *desc = container_of(css, struct cgroup, self); ++ ++ bpf_prog_array_free(desc->bpf.inactive); ++ desc->bpf.inactive = NULL; ++ } ++ ++ return err; ++} ++ ++#define BPF_CGROUP_MAX_PROGS 64 ++ ++/** ++ * __cgroup_bpf_attach() - Attach the program to a cgroup, and ++ * propagate the change to descendants ++ * @cgrp: The cgroup which descendants to traverse ++ * @prog: A program to attach ++ * @type: Type of attach operation ++ * @flags: Option flags ++ * ++ * Must be called with cgroup_mutex held. ++ */ ++int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, ++ enum bpf_attach_type type, u32 flags) ++{ ++ struct list_head *progs = &cgrp->bpf.progs[type]; ++ struct bpf_prog *old_prog = NULL; ++ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; ++ struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; ++ enum bpf_cgroup_storage_type stype; ++ struct bpf_prog_list *pl; ++ bool pl_was_allocated; ++ int err; ++ ++ if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ++ /* invalid combination */ ++ return -EINVAL; ++ ++ if (!hierarchy_allows_attach(cgrp, type, flags)) ++ return -EPERM; ++ ++ if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) ++ /* Disallow attaching non-overridable on top ++ * of existing overridable in this cgroup. ++ * Disallow attaching multi-prog if overridable or none ++ */ ++ return -EPERM; ++ ++ if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) ++ return -E2BIG; ++ ++ for_each_cgroup_storage_type(stype) { ++ storage[stype] = bpf_cgroup_storage_alloc(prog, stype); ++ if (IS_ERR(storage[stype])) { ++ storage[stype] = NULL; ++ for_each_cgroup_storage_type(stype) ++ bpf_cgroup_storage_free(storage[stype]); ++ return -ENOMEM; ++ } ++ } ++ ++ if (flags & BPF_F_ALLOW_MULTI) { ++ list_for_each_entry(pl, progs, node) { ++ if (pl->prog == prog) { ++ /* disallow attaching the same prog twice */ ++ for_each_cgroup_storage_type(stype) ++ bpf_cgroup_storage_free(storage[stype]); ++ return -EINVAL; ++ } ++ } ++ ++ pl = kmalloc(sizeof(*pl), GFP_KERNEL); ++ if (!pl) { ++ for_each_cgroup_storage_type(stype) ++ bpf_cgroup_storage_free(storage[stype]); ++ return -ENOMEM; ++ } ++ ++ pl_was_allocated = true; ++ pl->prog = prog; ++ for_each_cgroup_storage_type(stype) ++ pl->storage[stype] = storage[stype]; ++ list_add_tail(&pl->node, progs); ++ } else { ++ if (list_empty(progs)) { ++ pl = kmalloc(sizeof(*pl), GFP_KERNEL); ++ if (!pl) { ++ for_each_cgroup_storage_type(stype) ++ bpf_cgroup_storage_free(storage[stype]); ++ return -ENOMEM; ++ } ++ pl_was_allocated = true; ++ list_add_tail(&pl->node, progs); ++ } else { ++ pl = list_first_entry(progs, typeof(*pl), node); ++ old_prog = pl->prog; ++ for_each_cgroup_storage_type(stype) { ++ old_storage[stype] = pl->storage[stype]; ++ bpf_cgroup_storage_unlink(old_storage[stype]); ++ } ++ pl_was_allocated = false; ++ } ++ pl->prog = prog; ++ for_each_cgroup_storage_type(stype) ++ pl->storage[stype] = storage[stype]; ++ } ++ ++ cgrp->bpf.flags[type] = flags; ++ ++ err = update_effective_progs(cgrp, type); ++ if (err) ++ goto cleanup; ++ ++ static_branch_inc(&cgroup_bpf_enabled_key); ++ for_each_cgroup_storage_type(stype) { ++ if (!old_storage[stype]) ++ continue; ++ bpf_cgroup_storage_free(old_storage[stype]); ++ } ++ if (old_prog) { ++ bpf_prog_put(old_prog); ++ static_branch_dec(&cgroup_bpf_enabled_key); ++ } ++ for_each_cgroup_storage_type(stype) ++ bpf_cgroup_storage_link(storage[stype], cgrp, type); ++ return 0; ++ ++cleanup: ++ /* and cleanup the prog list */ ++ pl->prog = old_prog; ++ for_each_cgroup_storage_type(stype) { ++ bpf_cgroup_storage_free(pl->storage[stype]); ++ pl->storage[stype] = old_storage[stype]; ++ bpf_cgroup_storage_link(old_storage[stype], cgrp, type); ++ } ++ if (pl_was_allocated) { ++ list_del(&pl->node); ++ kfree(pl); ++ } ++ return err; ++} ++ ++/** ++ * __cgroup_bpf_detach() - Detach the program from a cgroup, and ++ * propagate the change to descendants ++ * @cgrp: The cgroup which descendants to traverse ++ * @prog: A program to detach or NULL ++ * @type: Type of detach operation ++ * ++ * Must be called with cgroup_mutex held. ++ */ ++int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, ++ enum bpf_attach_type type) ++{ ++ struct list_head *progs = &cgrp->bpf.progs[type]; ++ enum bpf_cgroup_storage_type stype; ++ u32 flags = cgrp->bpf.flags[type]; ++ struct bpf_prog *old_prog = NULL; ++ struct bpf_prog_list *pl; ++ int err; ++ ++ if (flags & BPF_F_ALLOW_MULTI) { ++ if (!prog) ++ /* to detach MULTI prog the user has to specify valid FD ++ * of the program to be detached ++ */ ++ return -EINVAL; ++ } else { ++ if (list_empty(progs)) ++ /* report error when trying to detach and nothing is attached */ ++ return -ENOENT; ++ } ++ ++ if (flags & BPF_F_ALLOW_MULTI) { ++ /* find the prog and detach it */ ++ list_for_each_entry(pl, progs, node) { ++ if (pl->prog != prog) ++ continue; ++ old_prog = prog; ++ /* mark it deleted, so it's ignored while ++ * recomputing effective ++ */ ++ pl->prog = NULL; ++ break; ++ } ++ if (!old_prog) ++ return -ENOENT; ++ } else { ++ /* to maintain backward compatibility NONE and OVERRIDE cgroups ++ * allow detaching with invalid FD (prog==NULL) ++ */ ++ pl = list_first_entry(progs, typeof(*pl), node); ++ old_prog = pl->prog; ++ pl->prog = NULL; ++ } ++ ++ err = update_effective_progs(cgrp, type); ++ if (err) ++ goto cleanup; ++ ++ /* now can actually delete it from this cgroup list */ ++ list_del(&pl->node); ++ for_each_cgroup_storage_type(stype) { ++ bpf_cgroup_storage_unlink(pl->storage[stype]); ++ bpf_cgroup_storage_free(pl->storage[stype]); ++ } ++ kfree(pl); ++ if (list_empty(progs)) ++ /* last program was detached, reset flags to zero */ ++ cgrp->bpf.flags[type] = 0; ++ ++ bpf_prog_put(old_prog); ++ static_branch_dec(&cgroup_bpf_enabled_key); ++ return 0; ++ ++cleanup: ++ /* and restore back old_prog */ ++ pl->prog = old_prog; ++ return err; ++} ++ ++/* Must be called with cgroup_mutex held to avoid races. */ ++int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); ++ enum bpf_attach_type type = attr->query.attach_type; ++ struct list_head *progs = &cgrp->bpf.progs[type]; ++ u32 flags = cgrp->bpf.flags[type]; ++ struct bpf_prog_array *effective; ++ int cnt, ret = 0, i; ++ ++ effective = rcu_dereference_protected(cgrp->bpf.effective[type], ++ lockdep_is_held(&cgroup_mutex)); ++ ++ if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) ++ cnt = bpf_prog_array_length(effective); ++ else ++ cnt = prog_list_length(progs); ++ ++ if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) ++ return -EFAULT; ++ if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) ++ return -EFAULT; ++ if (attr->query.prog_cnt == 0 || !prog_ids || !cnt) ++ /* return early if user requested only program count + flags */ ++ return 0; ++ if (attr->query.prog_cnt < cnt) { ++ cnt = attr->query.prog_cnt; ++ ret = -ENOSPC; ++ } ++ ++ if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { ++ return bpf_prog_array_copy_to_user(effective, prog_ids, cnt); ++ } else { ++ struct bpf_prog_list *pl; ++ u32 id; ++ ++ i = 0; ++ list_for_each_entry(pl, progs, node) { ++ id = pl->prog->aux->id; ++ if (copy_to_user(prog_ids + i, &id, sizeof(id))) ++ return -EFAULT; ++ if (++i == cnt) ++ break; ++ } ++ } ++ return ret; ++} ++ ++int cgroup_bpf_prog_attach(const union bpf_attr *attr, ++ enum bpf_prog_type ptype, struct bpf_prog *prog) ++{ ++ struct cgroup *cgrp; ++ int ret; ++ ++ cgrp = cgroup_get_from_fd(attr->target_fd); ++ if (IS_ERR(cgrp)) ++ return PTR_ERR(cgrp); ++ ++ ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, ++ attr->attach_flags); ++ cgroup_put(cgrp); ++ return ret; ++} ++ ++int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) ++{ ++ struct bpf_prog *prog; ++ struct cgroup *cgrp; ++ int ret; ++ ++ cgrp = cgroup_get_from_fd(attr->target_fd); ++ if (IS_ERR(cgrp)) ++ return PTR_ERR(cgrp); ++ ++ prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); ++ if (IS_ERR(prog)) ++ prog = NULL; ++ ++ ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); ++ if (prog) ++ bpf_prog_put(prog); ++ ++ cgroup_put(cgrp); ++ return ret; ++} ++ ++int cgroup_bpf_prog_query(const union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ struct cgroup *cgrp; ++ int ret; ++ ++ cgrp = cgroup_get_from_fd(attr->query.target_fd); ++ if (IS_ERR(cgrp)) ++ return PTR_ERR(cgrp); ++ ++ ret = cgroup_bpf_query(cgrp, attr, uattr); ++ ++ cgroup_put(cgrp); ++ return ret; ++} ++ ++/** ++ * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering ++ * @sk: The socket sending or receiving traffic ++ * @skb: The skb that is being sent or received ++ * @type: The type of program to be exectuted ++ * ++ * If no socket is passed, or the socket is not of type INET or INET6, ++ * this function does nothing and returns 0. ++ * ++ * The program type passed in via @type must be suitable for network ++ * filtering. No further check is performed to assert that. ++ * ++ * For egress packets, this function can return: ++ * NET_XMIT_SUCCESS (0) - continue with packet output ++ * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr ++ * NET_XMIT_CN (2) - continue with packet output and notify TCP ++ * to call cwr ++ * -EPERM - drop packet ++ * ++ * For ingress packets, this function will return -EPERM if any ++ * attached program was found and if it returned != 1 during execution. ++ * Otherwise 0 is returned. ++ */ ++int __cgroup_bpf_run_filter_skb(struct sock *sk, ++ struct sk_buff *skb, ++ enum bpf_attach_type type) ++{ ++ unsigned int offset = skb->data - skb_network_header(skb); ++ struct sock *save_sk; ++ void *saved_data_end; ++ struct cgroup *cgrp; ++ int ret; ++ ++ if (!sk || !sk_fullsock(sk)) ++ return 0; ++ ++ if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) ++ return 0; ++ ++ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); ++ save_sk = skb->sk; ++ skb->sk = sk; ++ __skb_push(skb, offset); ++ ++ /* compute pointers for the bpf prog */ ++ bpf_compute_and_save_data_end(skb, &saved_data_end); ++ ++ if (type == BPF_CGROUP_INET_EGRESS) { ++ ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY( ++ cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb); ++ } else { ++ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, ++ __bpf_prog_run_save_cb); ++ ret = (ret == 1 ? 0 : -EPERM); ++ } ++ bpf_restore_data_end(skb, saved_data_end); ++ __skb_pull(skb, offset); ++ skb->sk = save_sk; ++ ++ return ret; ++} ++EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); ++ ++/** ++ * __cgroup_bpf_run_filter_sk() - Run a program on a sock ++ * @sk: sock structure to manipulate ++ * @type: The type of program to be exectuted ++ * ++ * socket is passed is expected to be of type INET or INET6. ++ * ++ * The program type passed in via @type must be suitable for sock ++ * filtering. No further check is performed to assert that. ++ * ++ * This function will return %-EPERM if any if an attached program was found ++ * and if it returned != 1 during execution. In all other cases, 0 is returned. ++ */ ++int __cgroup_bpf_run_filter_sk(struct sock *sk, ++ enum bpf_attach_type type) ++{ ++ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); ++ int ret; ++ ++ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN); ++ return ret == 1 ? 0 : -EPERM; ++} ++EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); ++ ++/** ++ * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and ++ * provided by user sockaddr ++ * @sk: sock struct that will use sockaddr ++ * @uaddr: sockaddr struct provided by user ++ * @type: The type of program to be exectuted ++ * @t_ctx: Pointer to attach type specific context ++ * ++ * socket is expected to be of type INET or INET6. ++ * ++ * This function will return %-EPERM if an attached program is found and ++ * returned value != 1 during execution. In all other cases, 0 is returned. ++ */ ++int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, ++ struct sockaddr *uaddr, ++ enum bpf_attach_type type, ++ void *t_ctx) ++{ ++ struct bpf_sock_addr_kern ctx = { ++ .sk = sk, ++ .uaddr = uaddr, ++ .t_ctx = t_ctx, ++ }; ++ struct sockaddr_storage unspec; ++ struct cgroup *cgrp; ++ int ret; ++ ++ /* Check socket family since not all sockets represent network ++ * endpoint (e.g. AF_UNIX). ++ */ ++ if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) ++ return 0; ++ ++ if (!ctx.uaddr) { ++ memset(&unspec, 0, sizeof(unspec)); ++ ctx.uaddr = (struct sockaddr *)&unspec; ++ } ++ ++ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); ++ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); ++ ++ return ret == 1 ? 0 : -EPERM; ++} ++EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); ++ ++/** ++ * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock ++ * @sk: socket to get cgroup from ++ * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains ++ * sk with connection information (IP addresses, etc.) May not contain ++ * cgroup info if it is a req sock. ++ * @type: The type of program to be exectuted ++ * ++ * socket passed is expected to be of type INET or INET6. ++ * ++ * The program type passed in via @type must be suitable for sock_ops ++ * filtering. No further check is performed to assert that. ++ * ++ * This function will return %-EPERM if any if an attached program was found ++ * and if it returned != 1 during execution. In all other cases, 0 is returned. ++ */ ++int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, ++ struct bpf_sock_ops_kern *sock_ops, ++ enum bpf_attach_type type) ++{ ++ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); ++ int ret; ++ ++ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops, ++ BPF_PROG_RUN); ++ return ret == 1 ? 0 : -EPERM; ++} ++EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); ++ ++int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, ++ short access, enum bpf_attach_type type) ++{ ++ struct cgroup *cgrp; ++ struct bpf_cgroup_dev_ctx ctx = { ++ .access_type = (access << 16) | dev_type, ++ .major = major, ++ .minor = minor, ++ }; ++ int allow = 1; ++ ++ rcu_read_lock(); ++ cgrp = task_dfl_cgroup(current); ++ allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, ++ BPF_PROG_RUN); ++ rcu_read_unlock(); ++ ++ return !allow; ++} ++EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); ++ ++static const struct bpf_func_proto * ++cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ switch (func_id) { ++ case BPF_FUNC_map_lookup_elem: ++ return &bpf_map_lookup_elem_proto; ++ case BPF_FUNC_map_update_elem: ++ return &bpf_map_update_elem_proto; ++ case BPF_FUNC_map_delete_elem: ++ return &bpf_map_delete_elem_proto; ++ case BPF_FUNC_map_push_elem: ++ return &bpf_map_push_elem_proto; ++ case BPF_FUNC_map_pop_elem: ++ return &bpf_map_pop_elem_proto; ++ case BPF_FUNC_map_peek_elem: ++ return &bpf_map_peek_elem_proto; ++ case BPF_FUNC_get_current_uid_gid: ++ return &bpf_get_current_uid_gid_proto; ++ case BPF_FUNC_get_local_storage: ++ return &bpf_get_local_storage_proto; ++ case BPF_FUNC_get_current_cgroup_id: ++ return &bpf_get_current_cgroup_id_proto; ++ case BPF_FUNC_trace_printk: ++ if (capable(CAP_SYS_ADMIN)) ++ return bpf_get_trace_printk_proto(); ++ /* fall through */ ++ default: ++ return NULL; ++ } ++} ++ ++static const struct bpf_func_proto * ++cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ return cgroup_base_func_proto(func_id, prog); ++} ++ ++static bool cgroup_dev_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ const int size_default = sizeof(__u32); ++ ++ if (type == BPF_WRITE) ++ return false; ++ ++ if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) ++ return false; ++ /* The verifier guarantees that size > 0. */ ++ if (off % size != 0) ++ return false; ++ ++ switch (off) { ++ case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type): ++ bpf_ctx_record_field_size(info, size_default); ++ if (!bpf_ctx_narrow_access_ok(off, size, size_default)) ++ return false; ++ break; ++ default: ++ if (size != size_default) ++ return false; ++ } ++ ++ return true; ++} ++ ++const struct bpf_prog_ops cg_dev_prog_ops = { ++}; ++ ++const struct bpf_verifier_ops cg_dev_verifier_ops = { ++ .get_func_proto = cgroup_dev_func_proto, ++ .is_valid_access = cgroup_dev_is_valid_access, ++}; ++ ++/** ++ * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl ++ * ++ * @head: sysctl table header ++ * @table: sysctl table ++ * @write: sysctl is being read (= 0) or written (= 1) ++ * @buf: pointer to buffer passed by user space ++ * @pcount: value-result argument: value is size of buffer pointed to by @buf, ++ * result is size of @new_buf if program set new value, initial value ++ * otherwise ++ * @ppos: value-result argument: value is position at which read from or write ++ * to sysctl is happening, result is new position if program overrode it, ++ * initial value otherwise ++ * @new_buf: pointer to pointer to new buffer that will be allocated if program ++ * overrides new value provided by user space on sysctl write ++ * NOTE: it's caller responsibility to free *new_buf if it was set ++ * @type: type of program to be executed ++ * ++ * Program is run when sysctl is being accessed, either read or written, and ++ * can allow or deny such access. ++ * ++ * This function will return %-EPERM if an attached program is found and ++ * returned value != 1 during execution. In all other cases 0 is returned. ++ */ ++int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, ++ struct ctl_table *table, int write, ++ void __user *buf, size_t *pcount, ++ loff_t *ppos, void **new_buf, ++ enum bpf_attach_type type) ++{ ++ struct bpf_sysctl_kern ctx = { ++ .head = head, ++ .table = table, ++ .write = write, ++ .ppos = ppos, ++ .cur_val = NULL, ++ .cur_len = PAGE_SIZE, ++ .new_val = NULL, ++ .new_len = 0, ++ .new_updated = 0, ++ }; ++ struct cgroup *cgrp; ++ int ret; ++ ++ ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL); ++ if (ctx.cur_val) { ++ mm_segment_t old_fs; ++ loff_t pos = 0; ++ ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ if (table->proc_handler(table, 0, (void __user *)ctx.cur_val, ++ &ctx.cur_len, &pos)) { ++ /* Let BPF program decide how to proceed. */ ++ ctx.cur_len = 0; ++ } ++ set_fs(old_fs); ++ } else { ++ /* Let BPF program decide how to proceed. */ ++ ctx.cur_len = 0; ++ } ++ ++ if (write && buf && *pcount) { ++ /* BPF program should be able to override new value with a ++ * buffer bigger than provided by user. ++ */ ++ ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL); ++ ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount); ++ if (!ctx.new_val || ++ copy_from_user(ctx.new_val, buf, ctx.new_len)) ++ /* Let BPF program decide how to proceed. */ ++ ctx.new_len = 0; ++ } ++ ++ rcu_read_lock(); ++ cgrp = task_dfl_cgroup(current); ++ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); ++ rcu_read_unlock(); ++ ++ kfree(ctx.cur_val); ++ ++ if (ret == 1 && ctx.new_updated) { ++ *new_buf = ctx.new_val; ++ *pcount = ctx.new_len; ++ } else { ++ kfree(ctx.new_val); ++ } ++ ++ return ret == 1 ? 0 : -EPERM; ++} ++EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl); ++ ++#ifdef CONFIG_NET ++static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp, ++ enum bpf_attach_type attach_type) ++{ ++ struct bpf_prog_array *prog_array; ++ bool empty; ++ ++ rcu_read_lock(); ++ prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]); ++ empty = bpf_prog_array_is_empty(prog_array); ++ rcu_read_unlock(); ++ ++ return empty; ++} ++ ++static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen) ++{ ++ if (unlikely(max_optlen < 0)) ++ return -EINVAL; ++ ++ if (unlikely(max_optlen > PAGE_SIZE)) { ++ /* We don't expose optvals that are greater than PAGE_SIZE ++ * to the BPF program. ++ */ ++ max_optlen = PAGE_SIZE; ++ } ++ ++ ctx->optval = kzalloc(max_optlen, GFP_USER); ++ if (!ctx->optval) ++ return -ENOMEM; ++ ++ ctx->optval_end = ctx->optval + max_optlen; ++ ++ return max_optlen; ++} ++ ++static void sockopt_free_buf(struct bpf_sockopt_kern *ctx) ++{ ++ kfree(ctx->optval); ++} ++ ++int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, ++ int *optname, char __user *optval, ++ int *optlen, char **kernel_optval) ++{ ++ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); ++ struct bpf_sockopt_kern ctx = { ++ .sk = sk, ++ .level = *level, ++ .optname = *optname, ++ }; ++ int ret, max_optlen; ++ ++ /* Opportunistic check to see whether we have any BPF program ++ * attached to the hook so we don't waste time allocating ++ * memory and locking the socket. ++ */ ++ if (!cgroup_bpf_enabled || ++ __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT)) ++ return 0; ++ ++ /* Allocate a bit more than the initial user buffer for ++ * BPF program. The canonical use case is overriding ++ * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic). ++ */ ++ max_optlen = max_t(int, 16, *optlen); ++ ++ max_optlen = sockopt_alloc_buf(&ctx, max_optlen); ++ if (max_optlen < 0) ++ return max_optlen; ++ ++ ctx.optlen = *optlen; ++ ++ if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ lock_sock(sk); ++ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT], ++ &ctx, BPF_PROG_RUN); ++ release_sock(sk); ++ ++ if (!ret) { ++ ret = -EPERM; ++ goto out; ++ } ++ ++ if (ctx.optlen == -1) { ++ /* optlen set to -1, bypass kernel */ ++ ret = 1; ++ } else if (ctx.optlen > max_optlen || ctx.optlen < -1) { ++ /* optlen is out of bounds */ ++ ret = -EFAULT; ++ } else { ++ /* optlen within bounds, run kernel handler */ ++ ret = 0; ++ ++ /* export any potential modifications */ ++ *level = ctx.level; ++ *optname = ctx.optname; ++ ++ /* optlen == 0 from BPF indicates that we should ++ * use original userspace data. ++ */ ++ if (ctx.optlen != 0) { ++ *optlen = ctx.optlen; ++ *kernel_optval = ctx.optval; ++ /* export and don't free sockopt buf */ ++ return 0; ++ } ++ } ++ ++out: ++ sockopt_free_buf(&ctx); ++ return ret; ++} ++EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt); ++ ++int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ++ int optname, char __user *optval, ++ int __user *optlen, int max_optlen, ++ int retval) ++{ ++ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); ++ struct bpf_sockopt_kern ctx = { ++ .sk = sk, ++ .level = level, ++ .optname = optname, ++ .retval = retval, ++ }; ++ int ret; ++ ++ /* Opportunistic check to see whether we have any BPF program ++ * attached to the hook so we don't waste time allocating ++ * memory and locking the socket. ++ */ ++ if (!cgroup_bpf_enabled || ++ __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT)) ++ return retval; ++ ++ ctx.optlen = max_optlen; ++ ++ max_optlen = sockopt_alloc_buf(&ctx, max_optlen); ++ if (max_optlen < 0) ++ return max_optlen; ++ ++ if (!retval) { ++ /* If kernel getsockopt finished successfully, ++ * copy whatever was returned to the user back ++ * into our temporary buffer. Set optlen to the ++ * one that kernel returned as well to let ++ * BPF programs inspect the value. ++ */ ++ ++ if (get_user(ctx.optlen, optlen)) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ if (ctx.optlen < 0) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ if (copy_from_user(ctx.optval, optval, ++ min(ctx.optlen, max_optlen)) != 0) { ++ ret = -EFAULT; ++ goto out; ++ } ++ } ++ ++ lock_sock(sk); ++ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT], ++ &ctx, BPF_PROG_RUN); ++ release_sock(sk); ++ ++ if (!ret) { ++ ret = -EPERM; ++ goto out; ++ } ++ ++ if (ctx.optlen > max_optlen || ctx.optlen < 0) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ /* BPF programs only allowed to set retval to 0, not some ++ * arbitrary value. ++ */ ++ if (ctx.retval != 0 && ctx.retval != retval) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ if (ctx.optlen != 0) { ++ if (copy_to_user(optval, ctx.optval, ctx.optlen) || ++ put_user(ctx.optlen, optlen)) { ++ ret = -EFAULT; ++ goto out; ++ } ++ } ++ ++ ret = ctx.retval; ++ ++out: ++ sockopt_free_buf(&ctx); ++ return ret; ++} ++EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt); ++#endif ++ ++static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, ++ size_t *lenp) ++{ ++ ssize_t tmp_ret = 0, ret; ++ ++ if (dir->header.parent) { ++ tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp); ++ if (tmp_ret < 0) ++ return tmp_ret; ++ } ++ ++ ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp); ++ if (ret < 0) ++ return ret; ++ *bufp += ret; ++ *lenp -= ret; ++ ret += tmp_ret; ++ ++ /* Avoid leading slash. */ ++ if (!ret) ++ return ret; ++ ++ tmp_ret = strscpy(*bufp, "/", *lenp); ++ if (tmp_ret < 0) ++ return tmp_ret; ++ *bufp += tmp_ret; ++ *lenp -= tmp_ret; ++ ++ return ret + tmp_ret; ++} ++ ++BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf, ++ size_t, buf_len, u64, flags) ++{ ++ ssize_t tmp_ret = 0, ret; ++ ++ if (!buf) ++ return -EINVAL; ++ ++ if (!(flags & BPF_F_SYSCTL_BASE_NAME)) { ++ if (!ctx->head) ++ return -EINVAL; ++ tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len); ++ if (tmp_ret < 0) ++ return tmp_ret; ++ } ++ ++ ret = strscpy(buf, ctx->table->procname, buf_len); ++ ++ return ret < 0 ? ret : tmp_ret + ret; ++} ++ ++static const struct bpf_func_proto bpf_sysctl_get_name_proto = { ++ .func = bpf_sysctl_get_name, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_PTR_TO_MEM, ++ .arg3_type = ARG_CONST_SIZE, ++ .arg4_type = ARG_ANYTHING, ++}; ++ ++static int copy_sysctl_value(char *dst, size_t dst_len, char *src, ++ size_t src_len) ++{ ++ if (!dst) ++ return -EINVAL; ++ ++ if (!dst_len) ++ return -E2BIG; ++ ++ if (!src || !src_len) { ++ memset(dst, 0, dst_len); ++ return -EINVAL; ++ } ++ ++ memcpy(dst, src, min(dst_len, src_len)); ++ ++ if (dst_len > src_len) { ++ memset(dst + src_len, '\0', dst_len - src_len); ++ return src_len; ++ } ++ ++ dst[dst_len - 1] = '\0'; ++ ++ return -E2BIG; ++} ++ ++BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx, ++ char *, buf, size_t, buf_len) ++{ ++ return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len); ++} ++ ++static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = { ++ .func = bpf_sysctl_get_current_value, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_PTR_TO_UNINIT_MEM, ++ .arg3_type = ARG_CONST_SIZE, ++}; ++ ++BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf, ++ size_t, buf_len) ++{ ++ if (!ctx->write) { ++ if (buf && buf_len) ++ memset(buf, '\0', buf_len); ++ return -EINVAL; ++ } ++ return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len); ++} ++ ++static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = { ++ .func = bpf_sysctl_get_new_value, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_PTR_TO_UNINIT_MEM, ++ .arg3_type = ARG_CONST_SIZE, ++}; ++ ++BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx, ++ const char *, buf, size_t, buf_len) ++{ ++ if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len) ++ return -EINVAL; ++ ++ if (buf_len > PAGE_SIZE - 1) ++ return -E2BIG; ++ ++ memcpy(ctx->new_val, buf, buf_len); ++ ctx->new_len = buf_len; ++ ctx->new_updated = 1; ++ ++ return 0; ++} ++ ++static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = { ++ .func = bpf_sysctl_set_new_value, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_PTR_TO_MEM, ++ .arg3_type = ARG_CONST_SIZE, ++}; ++ ++static const struct bpf_func_proto * ++sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ switch (func_id) { ++ case BPF_FUNC_strtol: ++ return &bpf_strtol_proto; ++ case BPF_FUNC_strtoul: ++ return &bpf_strtoul_proto; ++ case BPF_FUNC_sysctl_get_name: ++ return &bpf_sysctl_get_name_proto; ++ case BPF_FUNC_sysctl_get_current_value: ++ return &bpf_sysctl_get_current_value_proto; ++ case BPF_FUNC_sysctl_get_new_value: ++ return &bpf_sysctl_get_new_value_proto; ++ case BPF_FUNC_sysctl_set_new_value: ++ return &bpf_sysctl_set_new_value_proto; ++ default: ++ return cgroup_base_func_proto(func_id, prog); ++ } ++} ++ ++static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ const int size_default = sizeof(__u32); ++ ++ if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size) ++ return false; ++ ++ switch (off) { ++ case bpf_ctx_range(struct bpf_sysctl, write): ++ if (type != BPF_READ) ++ return false; ++ bpf_ctx_record_field_size(info, size_default); ++ return bpf_ctx_narrow_access_ok(off, size, size_default); ++ case bpf_ctx_range(struct bpf_sysctl, file_pos): ++ if (type == BPF_READ) { ++ bpf_ctx_record_field_size(info, size_default); ++ return bpf_ctx_narrow_access_ok(off, size, size_default); ++ } else { ++ return size == size_default; ++ } ++ default: ++ return false; ++ } ++} ++ ++static u32 sysctl_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, u32 *target_size) ++{ ++ struct bpf_insn *insn = insn_buf; ++ u32 read_size; ++ ++ switch (si->off) { ++ case offsetof(struct bpf_sysctl, write): ++ *insn++ = BPF_LDX_MEM( ++ BPF_SIZE(si->code), si->dst_reg, si->src_reg, ++ bpf_target_off(struct bpf_sysctl_kern, write, ++ FIELD_SIZEOF(struct bpf_sysctl_kern, ++ write), ++ target_size)); ++ break; ++ case offsetof(struct bpf_sysctl, file_pos): ++ /* ppos is a pointer so it should be accessed via indirect ++ * loads and stores. Also for stores additional temporary ++ * register is used since neither src_reg nor dst_reg can be ++ * overridden. ++ */ ++ if (type == BPF_WRITE) { ++ int treg = BPF_REG_9; ++ ++ if (si->src_reg == treg || si->dst_reg == treg) ++ --treg; ++ if (si->src_reg == treg || si->dst_reg == treg) ++ --treg; ++ *insn++ = BPF_STX_MEM( ++ BPF_DW, si->dst_reg, treg, ++ offsetof(struct bpf_sysctl_kern, tmp_reg)); ++ *insn++ = BPF_LDX_MEM( ++ BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), ++ treg, si->dst_reg, ++ offsetof(struct bpf_sysctl_kern, ppos)); ++ *insn++ = BPF_STX_MEM( ++ BPF_SIZEOF(u32), treg, si->src_reg, ++ bpf_ctx_narrow_access_offset( ++ 0, sizeof(u32), sizeof(loff_t))); ++ *insn++ = BPF_LDX_MEM( ++ BPF_DW, treg, si->dst_reg, ++ offsetof(struct bpf_sysctl_kern, tmp_reg)); ++ } else { ++ *insn++ = BPF_LDX_MEM( ++ BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), ++ si->dst_reg, si->src_reg, ++ offsetof(struct bpf_sysctl_kern, ppos)); ++ read_size = bpf_size_to_bytes(BPF_SIZE(si->code)); ++ *insn++ = BPF_LDX_MEM( ++ BPF_SIZE(si->code), si->dst_reg, si->dst_reg, ++ bpf_ctx_narrow_access_offset( ++ 0, read_size, sizeof(loff_t))); ++ } ++ *target_size = sizeof(u32); ++ break; ++ } ++ ++ return insn - insn_buf; ++} ++ ++const struct bpf_verifier_ops cg_sysctl_verifier_ops = { ++ .get_func_proto = sysctl_func_proto, ++ .is_valid_access = sysctl_is_valid_access, ++ .convert_ctx_access = sysctl_convert_ctx_access, ++}; ++ ++const struct bpf_prog_ops cg_sysctl_prog_ops = { ++}; ++ ++static const struct bpf_func_proto * ++cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ switch (func_id) { ++#ifdef CONFIG_NET ++ case BPF_FUNC_sk_storage_get: ++ return &bpf_sk_storage_get_proto; ++ case BPF_FUNC_sk_storage_delete: ++ return &bpf_sk_storage_delete_proto; ++#endif ++#ifdef CONFIG_INET ++ case BPF_FUNC_tcp_sock: ++ return &bpf_tcp_sock_proto; ++#endif ++ default: ++ return cgroup_base_func_proto(func_id, prog); ++ } ++} ++ ++static bool cg_sockopt_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ const int size_default = sizeof(__u32); ++ ++ if (off < 0 || off >= sizeof(struct bpf_sockopt)) ++ return false; ++ ++ if (off % size != 0) ++ return false; ++ ++ if (type == BPF_WRITE) { ++ switch (off) { ++ case offsetof(struct bpf_sockopt, retval): ++ if (size != size_default) ++ return false; ++ return prog->expected_attach_type == ++ BPF_CGROUP_GETSOCKOPT; ++ case offsetof(struct bpf_sockopt, optname): ++ /* fallthrough */ ++ case offsetof(struct bpf_sockopt, level): ++ if (size != size_default) ++ return false; ++ return prog->expected_attach_type == ++ BPF_CGROUP_SETSOCKOPT; ++ case offsetof(struct bpf_sockopt, optlen): ++ return size == size_default; ++ default: ++ return false; ++ } ++ } ++ ++ switch (off) { ++ case offsetof(struct bpf_sockopt, sk): ++ if (size != sizeof(__u64)) ++ return false; ++ info->reg_type = PTR_TO_SOCKET; ++ break; ++ case offsetof(struct bpf_sockopt, optval): ++ if (size != sizeof(__u64)) ++ return false; ++ info->reg_type = PTR_TO_PACKET; ++ break; ++ case offsetof(struct bpf_sockopt, optval_end): ++ if (size != sizeof(__u64)) ++ return false; ++ info->reg_type = PTR_TO_PACKET_END; ++ break; ++ case offsetof(struct bpf_sockopt, retval): ++ if (size != size_default) ++ return false; ++ return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT; ++ default: ++ if (size != size_default) ++ return false; ++ break; ++ } ++ return true; ++} ++ ++#define CG_SOCKOPT_ACCESS_FIELD(T, F) \ ++ T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \ ++ si->dst_reg, si->src_reg, \ ++ offsetof(struct bpf_sockopt_kern, F)) ++ ++static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, ++ u32 *target_size) ++{ ++ struct bpf_insn *insn = insn_buf; ++ ++ switch (si->off) { ++ case offsetof(struct bpf_sockopt, sk): ++ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk); ++ break; ++ case offsetof(struct bpf_sockopt, level): ++ if (type == BPF_WRITE) ++ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level); ++ else ++ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level); ++ break; ++ case offsetof(struct bpf_sockopt, optname): ++ if (type == BPF_WRITE) ++ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname); ++ else ++ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname); ++ break; ++ case offsetof(struct bpf_sockopt, optlen): ++ if (type == BPF_WRITE) ++ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen); ++ else ++ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen); ++ break; ++ case offsetof(struct bpf_sockopt, retval): ++ if (type == BPF_WRITE) ++ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval); ++ else ++ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval); ++ break; ++ case offsetof(struct bpf_sockopt, optval): ++ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval); ++ break; ++ case offsetof(struct bpf_sockopt, optval_end): ++ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end); ++ break; ++ } ++ ++ return insn - insn_buf; ++} ++ ++static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf, ++ bool direct_write, ++ const struct bpf_prog *prog) ++{ ++ /* Nothing to do for sockopt argument. The data is kzalloc'ated. ++ */ ++ return 0; ++} ++ ++const struct bpf_verifier_ops cg_sockopt_verifier_ops = { ++ .get_func_proto = cg_sockopt_func_proto, ++ .is_valid_access = cg_sockopt_is_valid_access, ++ .convert_ctx_access = cg_sockopt_convert_ctx_access, ++ .gen_prologue = cg_sockopt_get_prologue, ++}; ++ ++const struct bpf_prog_ops cg_sockopt_prog_ops = { ++}; +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -1,3 +1,4 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later + /* + * Linux Socket Filter - Kernel level socket filtering + * +@@ -12,21 +13,22 @@ + * Alexei Starovoitov + * Daniel Borkmann + * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version +- * 2 of the License, or (at your option) any later version. +- * + * Andi Kleen - Fix a few bad bugs and races. + * Kris Katterjohn - Added many additional checks in bpf_check_classic() + */ + ++#include + #include + #include + #include + #include + #include + #include ++#include ++#include ++#include ++#include ++#include + + #include + +@@ -47,6 +49,7 @@ + #define DST regs[insn->dst_reg] + #define SRC regs[insn->src_reg] + #define FP regs[BPF_REG_FP] ++#define AX regs[BPF_REG_AX] + #define ARG1 regs[BPF_REG_ARG1] + #define CTX regs[BPF_REG_CTX] + #define IMM insn->imm +@@ -70,10 +73,9 @@ void *bpf_internal_load_pointer_neg_help + return NULL; + } + +-struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) ++struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags) + { +- gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | +- gfp_extra_flags; ++ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; + struct bpf_prog_aux *aux; + struct bpf_prog *fp; + +@@ -82,8 +84,6 @@ struct bpf_prog *bpf_prog_alloc(unsigned + if (fp == NULL) + return NULL; + +- kmemcheck_annotate_bitfield(fp, meta); +- + aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags); + if (aux == NULL) { + vfree(fp); +@@ -93,30 +93,151 @@ struct bpf_prog *bpf_prog_alloc(unsigned + fp->pages = size / PAGE_SIZE; + fp->aux = aux; + fp->aux->prog = fp; ++ fp->jit_requested = ebpf_jit_enabled(); ++ ++ INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode); + + return fp; + } ++ ++struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) ++{ ++ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; ++ struct bpf_prog *prog; ++ int cpu; ++ ++ prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags); ++ if (!prog) ++ return NULL; ++ ++ prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags); ++ if (!prog->aux->stats) { ++ kfree(prog->aux); ++ vfree(prog); ++ return NULL; ++ } ++ ++ for_each_possible_cpu(cpu) { ++ struct bpf_prog_stats *pstats; ++ ++ pstats = per_cpu_ptr(prog->aux->stats, cpu); ++ u64_stats_init(&pstats->syncp); ++ } ++ return prog; ++} + EXPORT_SYMBOL_GPL(bpf_prog_alloc); + ++int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) ++{ ++ if (!prog->aux->nr_linfo || !prog->jit_requested) ++ return 0; ++ ++ prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo, ++ sizeof(*prog->aux->jited_linfo), ++ GFP_KERNEL | __GFP_NOWARN); ++ if (!prog->aux->jited_linfo) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++void bpf_prog_free_jited_linfo(struct bpf_prog *prog) ++{ ++ kfree(prog->aux->jited_linfo); ++ prog->aux->jited_linfo = NULL; ++} ++ ++void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog) ++{ ++ if (prog->aux->jited_linfo && !prog->aux->jited_linfo[0]) ++ bpf_prog_free_jited_linfo(prog); ++} ++ ++/* The jit engine is responsible to provide an array ++ * for insn_off to the jited_off mapping (insn_to_jit_off). ++ * ++ * The idx to this array is the insn_off. Hence, the insn_off ++ * here is relative to the prog itself instead of the main prog. ++ * This array has one entry for each xlated bpf insn. ++ * ++ * jited_off is the byte off to the last byte of the jited insn. ++ * ++ * Hence, with ++ * insn_start: ++ * The first bpf insn off of the prog. The insn off ++ * here is relative to the main prog. ++ * e.g. if prog is a subprog, insn_start > 0 ++ * linfo_idx: ++ * The prog's idx to prog->aux->linfo and jited_linfo ++ * ++ * jited_linfo[linfo_idx] = prog->bpf_func ++ * ++ * For i > linfo_idx, ++ * ++ * jited_linfo[i] = prog->bpf_func + ++ * insn_to_jit_off[linfo[i].insn_off - insn_start - 1] ++ */ ++void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, ++ const u32 *insn_to_jit_off) ++{ ++ u32 linfo_idx, insn_start, insn_end, nr_linfo, i; ++ const struct bpf_line_info *linfo; ++ void **jited_linfo; ++ ++ if (!prog->aux->jited_linfo) ++ /* Userspace did not provide linfo */ ++ return; ++ ++ linfo_idx = prog->aux->linfo_idx; ++ linfo = &prog->aux->linfo[linfo_idx]; ++ insn_start = linfo[0].insn_off; ++ insn_end = insn_start + prog->len; ++ ++ jited_linfo = &prog->aux->jited_linfo[linfo_idx]; ++ jited_linfo[0] = prog->bpf_func; ++ ++ nr_linfo = prog->aux->nr_linfo - linfo_idx; ++ ++ for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++) ++ /* The verifier ensures that linfo[i].insn_off is ++ * strictly increasing ++ */ ++ jited_linfo[i] = prog->bpf_func + ++ insn_to_jit_off[linfo[i].insn_off - insn_start - 1]; ++} ++ ++void bpf_prog_free_linfo(struct bpf_prog *prog) ++{ ++ bpf_prog_free_jited_linfo(prog); ++ kvfree(prog->aux->linfo); ++} ++ + struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, + gfp_t gfp_extra_flags) + { +- gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | +- gfp_extra_flags; ++ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; + struct bpf_prog *fp; ++ u32 pages, delta; ++ int ret; + + BUG_ON(fp_old == NULL); + + size = round_up(size, PAGE_SIZE); +- if (size <= fp_old->pages * PAGE_SIZE) ++ pages = size / PAGE_SIZE; ++ if (pages <= fp_old->pages) + return fp_old; + +- fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); +- if (fp != NULL) { +- kmemcheck_annotate_bitfield(fp, meta); ++ delta = pages - fp_old->pages; ++ ret = __bpf_prog_charge(fp_old->aux->user, delta); ++ if (ret) ++ return NULL; + ++ fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); ++ if (fp == NULL) { ++ __bpf_prog_uncharge(fp_old->aux->user, delta); ++ } else { + memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); +- fp->pages = size / PAGE_SIZE; ++ fp->pages = pages; + fp->aux->prog = fp; + + /* We keep fp->aux from fp_old around in the new +@@ -128,40 +249,578 @@ struct bpf_prog *bpf_prog_realloc(struct + + return fp; + } +-EXPORT_SYMBOL_GPL(bpf_prog_realloc); + + void __bpf_prog_free(struct bpf_prog *fp) + { +- kfree(fp->aux); ++ if (fp->aux) { ++ free_percpu(fp->aux->stats); ++ kfree(fp->aux); ++ } + vfree(fp); + } +-EXPORT_SYMBOL_GPL(__bpf_prog_free); ++ ++int bpf_prog_calc_tag(struct bpf_prog *fp) ++{ ++ const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64); ++ u32 raw_size = bpf_prog_tag_scratch_size(fp); ++ u32 digest[SHA_DIGEST_WORDS]; ++ u32 ws[SHA_WORKSPACE_WORDS]; ++ u32 i, bsize, psize, blocks; ++ struct bpf_insn *dst; ++ bool was_ld_map; ++ u8 *raw, *todo; ++ __be32 *result; ++ __be64 *bits; ++ ++ raw = vmalloc(raw_size); ++ if (!raw) ++ return -ENOMEM; ++ ++ sha_init(digest); ++ memset(ws, 0, sizeof(ws)); ++ ++ /* We need to take out the map fd for the digest calculation ++ * since they are unstable from user space side. ++ */ ++ dst = (void *)raw; ++ for (i = 0, was_ld_map = false; i < fp->len; i++) { ++ dst[i] = fp->insnsi[i]; ++ if (!was_ld_map && ++ dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) && ++ (dst[i].src_reg == BPF_PSEUDO_MAP_FD || ++ dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) { ++ was_ld_map = true; ++ dst[i].imm = 0; ++ } else if (was_ld_map && ++ dst[i].code == 0 && ++ dst[i].dst_reg == 0 && ++ dst[i].src_reg == 0 && ++ dst[i].off == 0) { ++ was_ld_map = false; ++ dst[i].imm = 0; ++ } else { ++ was_ld_map = false; ++ } ++ } ++ ++ psize = bpf_prog_insn_size(fp); ++ memset(&raw[psize], 0, raw_size - psize); ++ raw[psize++] = 0x80; ++ ++ bsize = round_up(psize, SHA_MESSAGE_BYTES); ++ blocks = bsize / SHA_MESSAGE_BYTES; ++ todo = raw; ++ if (bsize - psize >= sizeof(__be64)) { ++ bits = (__be64 *)(todo + bsize - sizeof(__be64)); ++ } else { ++ bits = (__be64 *)(todo + bsize + bits_offset); ++ blocks++; ++ } ++ *bits = cpu_to_be64((psize - 1) << 3); ++ ++ while (blocks--) { ++ sha_transform(digest, todo, ws); ++ todo += SHA_MESSAGE_BYTES; ++ } ++ ++ result = (__force __be32 *)digest; ++ for (i = 0; i < SHA_DIGEST_WORDS; i++) ++ result[i] = cpu_to_be32(digest[i]); ++ memcpy(fp->tag, result, sizeof(fp->tag)); ++ ++ vfree(raw); ++ return 0; ++} ++ ++static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, ++ s32 end_new, s32 curr, const bool probe_pass) ++{ ++ const s64 imm_min = S32_MIN, imm_max = S32_MAX; ++ s32 delta = end_new - end_old; ++ s64 imm = insn->imm; ++ ++ if (curr < pos && curr + imm + 1 >= end_old) ++ imm += delta; ++ else if (curr >= end_new && curr + imm + 1 < end_new) ++ imm -= delta; ++ if (imm < imm_min || imm > imm_max) ++ return -ERANGE; ++ if (!probe_pass) ++ insn->imm = imm; ++ return 0; ++} ++ ++static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, ++ s32 end_new, s32 curr, const bool probe_pass) ++{ ++ const s32 off_min = S16_MIN, off_max = S16_MAX; ++ s32 delta = end_new - end_old; ++ s32 off = insn->off; ++ ++ if (curr < pos && curr + off + 1 >= end_old) ++ off += delta; ++ else if (curr >= end_new && curr + off + 1 < end_new) ++ off -= delta; ++ if (off < off_min || off > off_max) ++ return -ERANGE; ++ if (!probe_pass) ++ insn->off = off; ++ return 0; ++} ++ ++static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, ++ s32 end_new, const bool probe_pass) ++{ ++ u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0); ++ struct bpf_insn *insn = prog->insnsi; ++ int ret = 0; ++ ++ for (i = 0; i < insn_cnt; i++, insn++) { ++ u8 code; ++ ++ /* In the probing pass we still operate on the original, ++ * unpatched image in order to check overflows before we ++ * do any other adjustments. Therefore skip the patchlet. ++ */ ++ if (probe_pass && i == pos) { ++ i = end_new; ++ insn = prog->insnsi + end_old; ++ } ++ code = insn->code; ++ if ((BPF_CLASS(code) != BPF_JMP && ++ BPF_CLASS(code) != BPF_JMP32) || ++ BPF_OP(code) == BPF_EXIT) ++ continue; ++ /* Adjust offset of jmps if we cross patch boundaries. */ ++ if (BPF_OP(code) == BPF_CALL) { ++ if (insn->src_reg != BPF_PSEUDO_CALL) ++ continue; ++ ret = bpf_adj_delta_to_imm(insn, pos, end_old, ++ end_new, i, probe_pass); ++ } else { ++ ret = bpf_adj_delta_to_off(insn, pos, end_old, ++ end_new, i, probe_pass); ++ } ++ if (ret) ++ break; ++ } ++ ++ return ret; ++} ++ ++static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta) ++{ ++ struct bpf_line_info *linfo; ++ u32 i, nr_linfo; ++ ++ nr_linfo = prog->aux->nr_linfo; ++ if (!nr_linfo || !delta) ++ return; ++ ++ linfo = prog->aux->linfo; ++ ++ for (i = 0; i < nr_linfo; i++) ++ if (off < linfo[i].insn_off) ++ break; ++ ++ /* Push all off < linfo[i].insn_off by delta */ ++ for (; i < nr_linfo; i++) ++ linfo[i].insn_off += delta; ++} ++ ++struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, ++ const struct bpf_insn *patch, u32 len) ++{ ++ u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; ++ const u32 cnt_max = S16_MAX; ++ struct bpf_prog *prog_adj; ++ int err; ++ ++ /* Since our patchlet doesn't expand the image, we're done. */ ++ if (insn_delta == 0) { ++ memcpy(prog->insnsi + off, patch, sizeof(*patch)); ++ return prog; ++ } ++ ++ insn_adj_cnt = prog->len + insn_delta; ++ ++ /* Reject anything that would potentially let the insn->off ++ * target overflow when we have excessive program expansions. ++ * We need to probe here before we do any reallocation where ++ * we afterwards may not fail anymore. ++ */ ++ if (insn_adj_cnt > cnt_max && ++ (err = bpf_adj_branches(prog, off, off + 1, off + len, true))) ++ return ERR_PTR(err); ++ ++ /* Several new instructions need to be inserted. Make room ++ * for them. Likely, there's no need for a new allocation as ++ * last page could have large enough tailroom. ++ */ ++ prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt), ++ GFP_USER); ++ if (!prog_adj) ++ return ERR_PTR(-ENOMEM); ++ ++ prog_adj->len = insn_adj_cnt; ++ ++ /* Patching happens in 3 steps: ++ * ++ * 1) Move over tail of insnsi from next instruction onwards, ++ * so we can patch the single target insn with one or more ++ * new ones (patching is always from 1 to n insns, n > 0). ++ * 2) Inject new instructions at the target location. ++ * 3) Adjust branch offsets if necessary. ++ */ ++ insn_rest = insn_adj_cnt - off - len; ++ ++ memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1, ++ sizeof(*patch) * insn_rest); ++ memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len); ++ ++ /* We are guaranteed to not fail at this point, otherwise ++ * the ship has sailed to reverse to the original state. An ++ * overflow cannot happen at this point. ++ */ ++ BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false)); ++ ++ bpf_adj_linfo(prog_adj, off, insn_delta); ++ ++ return prog_adj; ++} ++ ++int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) ++{ ++ /* Branch offsets can't overflow when program is shrinking, no need ++ * to call bpf_adj_branches(..., true) here ++ */ ++ memmove(prog->insnsi + off, prog->insnsi + off + cnt, ++ sizeof(struct bpf_insn) * (prog->len - off - cnt)); ++ prog->len -= cnt; ++ ++ return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false)); ++} ++ ++static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) ++{ ++ int i; ++ ++ for (i = 0; i < fp->aux->func_cnt; i++) ++ bpf_prog_kallsyms_del(fp->aux->func[i]); ++} ++ ++void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) ++{ ++ bpf_prog_kallsyms_del_subprogs(fp); ++ bpf_prog_kallsyms_del(fp); ++} + + #ifdef CONFIG_BPF_JIT ++/* All BPF JIT sysctl knobs here. */ ++int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); ++int bpf_jit_harden __read_mostly; ++int bpf_jit_kallsyms __read_mostly; ++long bpf_jit_limit __read_mostly; ++ ++static __always_inline void ++bpf_get_prog_addr_region(const struct bpf_prog *prog, ++ unsigned long *symbol_start, ++ unsigned long *symbol_end) ++{ ++ const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog); ++ unsigned long addr = (unsigned long)hdr; ++ ++ WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog)); ++ ++ *symbol_start = addr; ++ *symbol_end = addr + hdr->pages * PAGE_SIZE; ++} ++ ++void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) ++{ ++ const char *end = sym + KSYM_NAME_LEN; ++ const struct btf_type *type; ++ const char *func_name; ++ ++ BUILD_BUG_ON(sizeof("bpf_prog_") + ++ sizeof(prog->tag) * 2 + ++ /* name has been null terminated. ++ * We should need +1 for the '_' preceding ++ * the name. However, the null character ++ * is double counted between the name and the ++ * sizeof("bpf_prog_") above, so we omit ++ * the +1 here. ++ */ ++ sizeof(prog->aux->name) > KSYM_NAME_LEN); ++ ++ sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_"); ++ sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); ++ ++ /* prog->aux->name will be ignored if full btf name is available */ ++ if (prog->aux->func_info_cnt) { ++ type = btf_type_by_id(prog->aux->btf, ++ prog->aux->func_info[prog->aux->func_idx].type_id); ++ func_name = btf_name_by_offset(prog->aux->btf, type->name_off); ++ snprintf(sym, (size_t)(end - sym), "_%s", func_name); ++ return; ++ } ++ ++ if (prog->aux->name[0]) ++ snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name); ++ else ++ *sym = 0; ++} ++ ++static __always_inline unsigned long ++bpf_get_prog_addr_start(struct latch_tree_node *n) ++{ ++ unsigned long symbol_start, symbol_end; ++ const struct bpf_prog_aux *aux; ++ ++ aux = container_of(n, struct bpf_prog_aux, ksym_tnode); ++ bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); ++ ++ return symbol_start; ++} ++ ++static __always_inline bool bpf_tree_less(struct latch_tree_node *a, ++ struct latch_tree_node *b) ++{ ++ return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b); ++} ++ ++static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n) ++{ ++ unsigned long val = (unsigned long)key; ++ unsigned long symbol_start, symbol_end; ++ const struct bpf_prog_aux *aux; ++ ++ aux = container_of(n, struct bpf_prog_aux, ksym_tnode); ++ bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); ++ ++ if (val < symbol_start) ++ return -1; ++ if (val >= symbol_end) ++ return 1; ++ ++ return 0; ++} ++ ++static const struct latch_tree_ops bpf_tree_ops = { ++ .less = bpf_tree_less, ++ .comp = bpf_tree_comp, ++}; ++ ++static DEFINE_SPINLOCK(bpf_lock); ++static LIST_HEAD(bpf_kallsyms); ++static struct latch_tree_root bpf_tree __cacheline_aligned; ++ ++static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) ++{ ++ WARN_ON_ONCE(!list_empty(&aux->ksym_lnode)); ++ list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms); ++ latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); ++} ++ ++static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux) ++{ ++ if (list_empty(&aux->ksym_lnode)) ++ return; ++ ++ latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); ++ list_del_rcu(&aux->ksym_lnode); ++} ++ ++static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) ++{ ++ return fp->jited && !bpf_prog_was_classic(fp); ++} ++ ++static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) ++{ ++ return list_empty(&fp->aux->ksym_lnode) || ++ fp->aux->ksym_lnode.prev == LIST_POISON2; ++} ++ ++void bpf_prog_kallsyms_add(struct bpf_prog *fp) ++{ ++ if (!bpf_prog_kallsyms_candidate(fp) || ++ !capable(CAP_SYS_ADMIN)) ++ return; ++ ++ spin_lock_bh(&bpf_lock); ++ bpf_prog_ksym_node_add(fp->aux); ++ spin_unlock_bh(&bpf_lock); ++} ++ ++void bpf_prog_kallsyms_del(struct bpf_prog *fp) ++{ ++ if (!bpf_prog_kallsyms_candidate(fp)) ++ return; ++ ++ spin_lock_bh(&bpf_lock); ++ bpf_prog_ksym_node_del(fp->aux); ++ spin_unlock_bh(&bpf_lock); ++} ++ ++static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr) ++{ ++ struct latch_tree_node *n; ++ ++ if (!bpf_jit_kallsyms_enabled()) ++ return NULL; ++ ++ n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops); ++ return n ? ++ container_of(n, struct bpf_prog_aux, ksym_tnode)->prog : ++ NULL; ++} ++ ++const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, ++ unsigned long *off, char *sym) ++{ ++ unsigned long symbol_start, symbol_end; ++ struct bpf_prog *prog; ++ char *ret = NULL; ++ ++ rcu_read_lock(); ++ prog = bpf_prog_kallsyms_find(addr); ++ if (prog) { ++ bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end); ++ bpf_get_prog_name(prog, sym); ++ ++ ret = sym; ++ if (size) ++ *size = symbol_end - symbol_start; ++ if (off) ++ *off = addr - symbol_start; ++ } ++ rcu_read_unlock(); ++ ++ return ret; ++} ++ ++bool is_bpf_text_address(unsigned long addr) ++{ ++ bool ret; ++ ++ rcu_read_lock(); ++ ret = bpf_prog_kallsyms_find(addr) != NULL; ++ rcu_read_unlock(); ++ ++ return ret; ++} ++ ++int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, ++ char *sym) ++{ ++ struct bpf_prog_aux *aux; ++ unsigned int it = 0; ++ int ret = -ERANGE; ++ ++ if (!bpf_jit_kallsyms_enabled()) ++ return ret; ++ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) { ++ if (it++ != symnum) ++ continue; ++ ++ bpf_get_prog_name(aux->prog, sym); ++ ++ *value = (unsigned long)aux->prog->bpf_func; ++ *type = BPF_SYM_ELF_TYPE; ++ ++ ret = 0; ++ break; ++ } ++ rcu_read_unlock(); ++ ++ return ret; ++} ++ ++static atomic_long_t bpf_jit_current; ++ ++/* Can be overridden by an arch's JIT compiler if it has a custom, ++ * dedicated BPF backend memory area, or if neither of the two ++ * below apply. ++ */ ++u64 __weak bpf_jit_alloc_exec_limit(void) ++{ ++#if defined(MODULES_VADDR) ++ return MODULES_END - MODULES_VADDR; ++#else ++ return VMALLOC_END - VMALLOC_START; ++#endif ++} ++ ++static int __init bpf_jit_charge_init(void) ++{ ++ /* Only used as heuristic here to derive limit. */ ++ bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, ++ PAGE_SIZE), LONG_MAX); ++ return 0; ++} ++pure_initcall(bpf_jit_charge_init); ++ ++static int bpf_jit_charge_modmem(u32 pages) ++{ ++ if (atomic_long_add_return(pages, &bpf_jit_current) > ++ (bpf_jit_limit >> PAGE_SHIFT)) { ++ if (!capable(CAP_SYS_ADMIN)) { ++ atomic_long_sub(pages, &bpf_jit_current); ++ return -EPERM; ++ } ++ } ++ ++ return 0; ++} ++ ++static void bpf_jit_uncharge_modmem(u32 pages) ++{ ++ atomic_long_sub(pages, &bpf_jit_current); ++} ++ ++void *__weak bpf_jit_alloc_exec(unsigned long size) ++{ ++ return module_alloc(size); ++} ++ ++void __weak bpf_jit_free_exec(void *addr) ++{ ++ module_memfree(addr); ++} ++ + struct bpf_binary_header * + bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, + unsigned int alignment, + bpf_jit_fill_hole_t bpf_fill_ill_insns) + { + struct bpf_binary_header *hdr; +- unsigned int size, hole, start; ++ u32 size, hole, start, pages; + + /* Most of BPF filters are really small, but if some of them + * fill a page, allow at least 128 extra bytes to insert a + * random section of illegal instructions. + */ + size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); +- hdr = module_alloc(size); +- if (hdr == NULL) ++ pages = size / PAGE_SIZE; ++ ++ if (bpf_jit_charge_modmem(pages)) ++ return NULL; ++ hdr = bpf_jit_alloc_exec(size); ++ if (!hdr) { ++ bpf_jit_uncharge_modmem(pages); + return NULL; ++ } + + /* Fill space with illegal/arch-dep instructions. */ + bpf_fill_ill_insns(hdr, size); + +- hdr->pages = size / PAGE_SIZE; ++ hdr->pages = pages; + hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), + PAGE_SIZE - sizeof(*hdr)); +- start = (prandom_u32() % hole) & ~(alignment - 1); ++ start = (get_random_int() % hole) & ~(alignment - 1); + + /* Leave a random number of instructions before BPF code. */ + *image_ptr = &hdr->image[start]; +@@ -171,13 +830,301 @@ bpf_jit_binary_alloc(unsigned int progle + + void bpf_jit_binary_free(struct bpf_binary_header *hdr) + { +- module_memfree(hdr); ++ u32 pages = hdr->pages; ++ ++ bpf_jit_free_exec(hdr); ++ bpf_jit_uncharge_modmem(pages); ++} ++ ++/* This symbol is only overridden by archs that have different ++ * requirements than the usual eBPF JITs, f.e. when they only ++ * implement cBPF JIT, do not set images read-only, etc. ++ */ ++void __weak bpf_jit_free(struct bpf_prog *fp) ++{ ++ if (fp->jited) { ++ struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); ++ ++ bpf_jit_binary_unlock_ro(hdr); ++ bpf_jit_binary_free(hdr); ++ ++ WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); ++ } ++ ++ bpf_prog_unlock_free(fp); ++} ++ ++int bpf_jit_get_func_addr(const struct bpf_prog *prog, ++ const struct bpf_insn *insn, bool extra_pass, ++ u64 *func_addr, bool *func_addr_fixed) ++{ ++ s16 off = insn->off; ++ s32 imm = insn->imm; ++ u8 *addr; ++ ++ *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL; ++ if (!*func_addr_fixed) { ++ /* Place-holder address till the last pass has collected ++ * all addresses for JITed subprograms in which case we ++ * can pick them up from prog->aux. ++ */ ++ if (!extra_pass) ++ addr = NULL; ++ else if (prog->aux->func && ++ off >= 0 && off < prog->aux->func_cnt) ++ addr = (u8 *)prog->aux->func[off]->bpf_func; ++ else ++ return -EINVAL; ++ } else { ++ /* Address of a BPF helper call. Since part of the core ++ * kernel, it's always at a fixed location. __bpf_call_base ++ * and the helper with imm relative to it are both in core ++ * kernel. ++ */ ++ addr = (u8 *)__bpf_call_base + imm; ++ } ++ ++ *func_addr = (unsigned long)addr; ++ return 0; ++} ++ ++static int bpf_jit_blind_insn(const struct bpf_insn *from, ++ const struct bpf_insn *aux, ++ struct bpf_insn *to_buff, ++ bool emit_zext) ++{ ++ struct bpf_insn *to = to_buff; ++ u32 imm_rnd = get_random_int(); ++ s16 off; ++ ++ BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); ++ BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); ++ ++ /* Constraints on AX register: ++ * ++ * AX register is inaccessible from user space. It is mapped in ++ * all JITs, and used here for constant blinding rewrites. It is ++ * typically "stateless" meaning its contents are only valid within ++ * the executed instruction, but not across several instructions. ++ * There are a few exceptions however which are further detailed ++ * below. ++ * ++ * Constant blinding is only used by JITs, not in the interpreter. ++ * The interpreter uses AX in some occasions as a local temporary ++ * register e.g. in DIV or MOD instructions. ++ * ++ * In restricted circumstances, the verifier can also use the AX ++ * register for rewrites as long as they do not interfere with ++ * the above cases! ++ */ ++ if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX) ++ goto out; ++ ++ if (from->imm == 0 && ++ (from->code == (BPF_ALU | BPF_MOV | BPF_K) || ++ from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { ++ *to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg); ++ goto out; ++ } ++ ++ switch (from->code) { ++ case BPF_ALU | BPF_ADD | BPF_K: ++ case BPF_ALU | BPF_SUB | BPF_K: ++ case BPF_ALU | BPF_AND | BPF_K: ++ case BPF_ALU | BPF_OR | BPF_K: ++ case BPF_ALU | BPF_XOR | BPF_K: ++ case BPF_ALU | BPF_MUL | BPF_K: ++ case BPF_ALU | BPF_MOV | BPF_K: ++ case BPF_ALU | BPF_DIV | BPF_K: ++ case BPF_ALU | BPF_MOD | BPF_K: ++ *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); ++ *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); ++ *to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX); ++ break; ++ ++ case BPF_ALU64 | BPF_ADD | BPF_K: ++ case BPF_ALU64 | BPF_SUB | BPF_K: ++ case BPF_ALU64 | BPF_AND | BPF_K: ++ case BPF_ALU64 | BPF_OR | BPF_K: ++ case BPF_ALU64 | BPF_XOR | BPF_K: ++ case BPF_ALU64 | BPF_MUL | BPF_K: ++ case BPF_ALU64 | BPF_MOV | BPF_K: ++ case BPF_ALU64 | BPF_DIV | BPF_K: ++ case BPF_ALU64 | BPF_MOD | BPF_K: ++ *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); ++ *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); ++ *to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX); ++ break; ++ ++ case BPF_JMP | BPF_JEQ | BPF_K: ++ case BPF_JMP | BPF_JNE | BPF_K: ++ case BPF_JMP | BPF_JGT | BPF_K: ++ case BPF_JMP | BPF_JLT | BPF_K: ++ case BPF_JMP | BPF_JGE | BPF_K: ++ case BPF_JMP | BPF_JLE | BPF_K: ++ case BPF_JMP | BPF_JSGT | BPF_K: ++ case BPF_JMP | BPF_JSLT | BPF_K: ++ case BPF_JMP | BPF_JSGE | BPF_K: ++ case BPF_JMP | BPF_JSLE | BPF_K: ++ case BPF_JMP | BPF_JSET | BPF_K: ++ /* Accommodate for extra offset in case of a backjump. */ ++ off = from->off; ++ if (off < 0) ++ off -= 2; ++ *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); ++ *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); ++ *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); ++ break; ++ ++ case BPF_JMP32 | BPF_JEQ | BPF_K: ++ case BPF_JMP32 | BPF_JNE | BPF_K: ++ case BPF_JMP32 | BPF_JGT | BPF_K: ++ case BPF_JMP32 | BPF_JLT | BPF_K: ++ case BPF_JMP32 | BPF_JGE | BPF_K: ++ case BPF_JMP32 | BPF_JLE | BPF_K: ++ case BPF_JMP32 | BPF_JSGT | BPF_K: ++ case BPF_JMP32 | BPF_JSLT | BPF_K: ++ case BPF_JMP32 | BPF_JSGE | BPF_K: ++ case BPF_JMP32 | BPF_JSLE | BPF_K: ++ case BPF_JMP32 | BPF_JSET | BPF_K: ++ /* Accommodate for extra offset in case of a backjump. */ ++ off = from->off; ++ if (off < 0) ++ off -= 2; ++ *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); ++ *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); ++ *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX, ++ off); ++ break; ++ ++ case BPF_LD | BPF_IMM | BPF_DW: ++ *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); ++ *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); ++ *to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); ++ *to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX); ++ break; ++ case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */ ++ *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm); ++ *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); ++ if (emit_zext) ++ *to++ = BPF_ZEXT_REG(BPF_REG_AX); ++ *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX); ++ break; ++ ++ case BPF_ST | BPF_MEM | BPF_DW: ++ case BPF_ST | BPF_MEM | BPF_W: ++ case BPF_ST | BPF_MEM | BPF_H: ++ case BPF_ST | BPF_MEM | BPF_B: ++ *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); ++ *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); ++ *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off); ++ break; ++ } ++out: ++ return to - to_buff; ++} ++ ++static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, ++ gfp_t gfp_extra_flags) ++{ ++ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; ++ struct bpf_prog *fp; ++ ++ fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); ++ if (fp != NULL) { ++ /* aux->prog still points to the fp_other one, so ++ * when promoting the clone to the real program, ++ * this still needs to be adapted. ++ */ ++ memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE); ++ } ++ ++ return fp; ++} ++ ++static void bpf_prog_clone_free(struct bpf_prog *fp) ++{ ++ /* aux was stolen by the other clone, so we cannot free ++ * it from this path! It will be freed eventually by the ++ * other program on release. ++ * ++ * At this point, we don't need a deferred release since ++ * clone is guaranteed to not be locked. ++ */ ++ fp->aux = NULL; ++ __bpf_prog_free(fp); ++} ++ ++void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other) ++{ ++ /* We have to repoint aux->prog to self, as we don't ++ * know whether fp here is the clone or the original. ++ */ ++ fp->aux->prog = fp; ++ bpf_prog_clone_free(fp_other); ++} ++ ++struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) ++{ ++ struct bpf_insn insn_buff[16], aux[2]; ++ struct bpf_prog *clone, *tmp; ++ int insn_delta, insn_cnt; ++ struct bpf_insn *insn; ++ int i, rewritten; ++ ++ if (!bpf_jit_blinding_enabled(prog) || prog->blinded) ++ return prog; ++ ++ clone = bpf_prog_clone_create(prog, GFP_USER); ++ if (!clone) ++ return ERR_PTR(-ENOMEM); ++ ++ insn_cnt = clone->len; ++ insn = clone->insnsi; ++ ++ for (i = 0; i < insn_cnt; i++, insn++) { ++ /* We temporarily need to hold the original ld64 insn ++ * so that we can still access the first part in the ++ * second blinding run. ++ */ ++ if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) && ++ insn[1].code == 0) ++ memcpy(aux, insn, sizeof(aux)); ++ ++ rewritten = bpf_jit_blind_insn(insn, aux, insn_buff, ++ clone->aux->verifier_zext); ++ if (!rewritten) ++ continue; ++ ++ tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten); ++ if (IS_ERR(tmp)) { ++ /* Patching may have repointed aux->prog during ++ * realloc from the original one, so we need to ++ * fix it up here on error. ++ */ ++ bpf_jit_prog_release_other(prog, clone); ++ return tmp; ++ } ++ ++ clone = tmp; ++ insn_delta = rewritten - 1; ++ ++ /* Walk new program and skip insns we just inserted. */ ++ insn = clone->insnsi + i + insn_delta; ++ insn_cnt += insn_delta; ++ i += insn_delta; ++ } ++ ++ clone->blinded = 1; ++ return clone; + } + #endif /* CONFIG_BPF_JIT */ + + /* Base function for offset calculation. Needs to go into .text section, + * therefore keeping it non-static as well; will also be used by JITs +- * anyway later on, so do not let the compiler omit it. ++ * anyway later on, so do not let the compiler omit it. This also needs ++ * to go into kallsyms for correlation from e.g. bpftool, so naming ++ * must not change. + */ + noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) + { +@@ -185,157 +1132,243 @@ noinline u64 __bpf_call_base(u64 r1, u64 + } + EXPORT_SYMBOL_GPL(__bpf_call_base); + ++/* All UAPI available opcodes. */ ++#define BPF_INSN_MAP(INSN_2, INSN_3) \ ++ /* 32 bit ALU operations. */ \ ++ /* Register based. */ \ ++ INSN_3(ALU, ADD, X), \ ++ INSN_3(ALU, SUB, X), \ ++ INSN_3(ALU, AND, X), \ ++ INSN_3(ALU, OR, X), \ ++ INSN_3(ALU, LSH, X), \ ++ INSN_3(ALU, RSH, X), \ ++ INSN_3(ALU, XOR, X), \ ++ INSN_3(ALU, MUL, X), \ ++ INSN_3(ALU, MOV, X), \ ++ INSN_3(ALU, ARSH, X), \ ++ INSN_3(ALU, DIV, X), \ ++ INSN_3(ALU, MOD, X), \ ++ INSN_2(ALU, NEG), \ ++ INSN_3(ALU, END, TO_BE), \ ++ INSN_3(ALU, END, TO_LE), \ ++ /* Immediate based. */ \ ++ INSN_3(ALU, ADD, K), \ ++ INSN_3(ALU, SUB, K), \ ++ INSN_3(ALU, AND, K), \ ++ INSN_3(ALU, OR, K), \ ++ INSN_3(ALU, LSH, K), \ ++ INSN_3(ALU, RSH, K), \ ++ INSN_3(ALU, XOR, K), \ ++ INSN_3(ALU, MUL, K), \ ++ INSN_3(ALU, MOV, K), \ ++ INSN_3(ALU, ARSH, K), \ ++ INSN_3(ALU, DIV, K), \ ++ INSN_3(ALU, MOD, K), \ ++ /* 64 bit ALU operations. */ \ ++ /* Register based. */ \ ++ INSN_3(ALU64, ADD, X), \ ++ INSN_3(ALU64, SUB, X), \ ++ INSN_3(ALU64, AND, X), \ ++ INSN_3(ALU64, OR, X), \ ++ INSN_3(ALU64, LSH, X), \ ++ INSN_3(ALU64, RSH, X), \ ++ INSN_3(ALU64, XOR, X), \ ++ INSN_3(ALU64, MUL, X), \ ++ INSN_3(ALU64, MOV, X), \ ++ INSN_3(ALU64, ARSH, X), \ ++ INSN_3(ALU64, DIV, X), \ ++ INSN_3(ALU64, MOD, X), \ ++ INSN_2(ALU64, NEG), \ ++ /* Immediate based. */ \ ++ INSN_3(ALU64, ADD, K), \ ++ INSN_3(ALU64, SUB, K), \ ++ INSN_3(ALU64, AND, K), \ ++ INSN_3(ALU64, OR, K), \ ++ INSN_3(ALU64, LSH, K), \ ++ INSN_3(ALU64, RSH, K), \ ++ INSN_3(ALU64, XOR, K), \ ++ INSN_3(ALU64, MUL, K), \ ++ INSN_3(ALU64, MOV, K), \ ++ INSN_3(ALU64, ARSH, K), \ ++ INSN_3(ALU64, DIV, K), \ ++ INSN_3(ALU64, MOD, K), \ ++ /* Call instruction. */ \ ++ INSN_2(JMP, CALL), \ ++ /* Exit instruction. */ \ ++ INSN_2(JMP, EXIT), \ ++ /* 32-bit Jump instructions. */ \ ++ /* Register based. */ \ ++ INSN_3(JMP32, JEQ, X), \ ++ INSN_3(JMP32, JNE, X), \ ++ INSN_3(JMP32, JGT, X), \ ++ INSN_3(JMP32, JLT, X), \ ++ INSN_3(JMP32, JGE, X), \ ++ INSN_3(JMP32, JLE, X), \ ++ INSN_3(JMP32, JSGT, X), \ ++ INSN_3(JMP32, JSLT, X), \ ++ INSN_3(JMP32, JSGE, X), \ ++ INSN_3(JMP32, JSLE, X), \ ++ INSN_3(JMP32, JSET, X), \ ++ /* Immediate based. */ \ ++ INSN_3(JMP32, JEQ, K), \ ++ INSN_3(JMP32, JNE, K), \ ++ INSN_3(JMP32, JGT, K), \ ++ INSN_3(JMP32, JLT, K), \ ++ INSN_3(JMP32, JGE, K), \ ++ INSN_3(JMP32, JLE, K), \ ++ INSN_3(JMP32, JSGT, K), \ ++ INSN_3(JMP32, JSLT, K), \ ++ INSN_3(JMP32, JSGE, K), \ ++ INSN_3(JMP32, JSLE, K), \ ++ INSN_3(JMP32, JSET, K), \ ++ /* Jump instructions. */ \ ++ /* Register based. */ \ ++ INSN_3(JMP, JEQ, X), \ ++ INSN_3(JMP, JNE, X), \ ++ INSN_3(JMP, JGT, X), \ ++ INSN_3(JMP, JLT, X), \ ++ INSN_3(JMP, JGE, X), \ ++ INSN_3(JMP, JLE, X), \ ++ INSN_3(JMP, JSGT, X), \ ++ INSN_3(JMP, JSLT, X), \ ++ INSN_3(JMP, JSGE, X), \ ++ INSN_3(JMP, JSLE, X), \ ++ INSN_3(JMP, JSET, X), \ ++ /* Immediate based. */ \ ++ INSN_3(JMP, JEQ, K), \ ++ INSN_3(JMP, JNE, K), \ ++ INSN_3(JMP, JGT, K), \ ++ INSN_3(JMP, JLT, K), \ ++ INSN_3(JMP, JGE, K), \ ++ INSN_3(JMP, JLE, K), \ ++ INSN_3(JMP, JSGT, K), \ ++ INSN_3(JMP, JSLT, K), \ ++ INSN_3(JMP, JSGE, K), \ ++ INSN_3(JMP, JSLE, K), \ ++ INSN_3(JMP, JSET, K), \ ++ INSN_2(JMP, JA), \ ++ /* Store instructions. */ \ ++ /* Register based. */ \ ++ INSN_3(STX, MEM, B), \ ++ INSN_3(STX, MEM, H), \ ++ INSN_3(STX, MEM, W), \ ++ INSN_3(STX, MEM, DW), \ ++ INSN_3(STX, XADD, W), \ ++ INSN_3(STX, XADD, DW), \ ++ /* Immediate based. */ \ ++ INSN_3(ST, MEM, B), \ ++ INSN_3(ST, MEM, H), \ ++ INSN_3(ST, MEM, W), \ ++ INSN_3(ST, MEM, DW), \ ++ /* Load instructions. */ \ ++ /* Register based. */ \ ++ INSN_3(LDX, MEM, B), \ ++ INSN_3(LDX, MEM, H), \ ++ INSN_3(LDX, MEM, W), \ ++ INSN_3(LDX, MEM, DW), \ ++ /* Immediate based. */ \ ++ INSN_3(LD, IMM, DW) ++ ++bool bpf_opcode_in_insntable(u8 code) ++{ ++#define BPF_INSN_2_TBL(x, y) [BPF_##x | BPF_##y] = true ++#define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true ++ static const bool public_insntable[256] = { ++ [0 ... 255] = false, ++ /* Now overwrite non-defaults ... */ ++ BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL), ++ /* UAPI exposed, but rewritten opcodes. cBPF carry-over. */ ++ [BPF_LD | BPF_ABS | BPF_B] = true, ++ [BPF_LD | BPF_ABS | BPF_H] = true, ++ [BPF_LD | BPF_ABS | BPF_W] = true, ++ [BPF_LD | BPF_IND | BPF_B] = true, ++ [BPF_LD | BPF_IND | BPF_H] = true, ++ [BPF_LD | BPF_IND | BPF_W] = true, ++ }; ++#undef BPF_INSN_3_TBL ++#undef BPF_INSN_2_TBL ++ return public_insntable[code]; ++} ++ ++#ifndef CONFIG_BPF_JIT_ALWAYS_ON + /** + * __bpf_prog_run - run eBPF program on a given context +- * @ctx: is the data we are operating on ++ * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers + * @insn: is the array of eBPF instructions ++ * @stack: is the eBPF storage stack + * + * Decode and execute eBPF instructions. + */ +-static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) ++static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) + { +- u64 stack[MAX_BPF_STACK / sizeof(u64)]; +- u64 regs[MAX_BPF_REG], tmp; +- static const void *jumptable[256] = { ++#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y ++#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z ++ static const void * const jumptable[256] = { + [0 ... 255] = &&default_label, + /* Now overwrite non-defaults ... */ +- /* 32 bit ALU operations */ +- [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X, +- [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K, +- [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X, +- [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K, +- [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X, +- [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K, +- [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X, +- [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K, +- [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X, +- [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K, +- [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X, +- [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K, +- [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X, +- [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K, +- [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X, +- [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K, +- [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X, +- [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K, +- [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X, +- [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K, +- [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X, +- [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K, +- [BPF_ALU | BPF_NEG] = &&ALU_NEG, +- [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE, +- [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE, +- /* 64 bit ALU operations */ +- [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X, +- [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K, +- [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X, +- [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K, +- [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X, +- [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K, +- [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X, +- [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K, +- [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X, +- [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K, +- [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X, +- [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K, +- [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X, +- [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K, +- [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X, +- [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K, +- [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X, +- [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K, +- [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X, +- [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K, +- [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X, +- [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K, +- [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X, +- [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K, +- [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, +- /* Call instruction */ +- [BPF_JMP | BPF_CALL] = &&JMP_CALL, +- [BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL, +- /* Jumps */ +- [BPF_JMP | BPF_JA] = &&JMP_JA, +- [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, +- [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K, +- [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X, +- [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K, +- [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X, +- [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K, +- [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X, +- [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K, +- [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X, +- [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K, +- [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X, +- [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K, +- [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X, +- [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K, +- /* Program return */ +- [BPF_JMP | BPF_EXIT] = &&JMP_EXIT, +- /* Store instructions */ +- [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B, +- [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H, +- [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W, +- [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW, +- [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W, +- [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW, +- [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B, +- [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H, +- [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W, +- [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW, +- /* Load instructions */ +- [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B, +- [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H, +- [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W, +- [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW, +- [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W, +- [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H, +- [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B, +- [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W, +- [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H, +- [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, +- [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW, ++ BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL), ++ /* Non-UAPI available opcodes. */ ++ [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, ++ [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, + }; ++#undef BPF_INSN_3_LBL ++#undef BPF_INSN_2_LBL + u32 tail_call_cnt = 0; +- void *ptr; +- int off; + + #define CONT ({ insn++; goto select_insn; }) + #define CONT_JMP ({ insn++; goto select_insn; }) + +- FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; +- ARG1 = (u64) (unsigned long) ctx; +- +- /* Registers used in classic BPF programs need to be reset first. */ +- regs[BPF_REG_A] = 0; +- regs[BPF_REG_X] = 0; +- + select_insn: + goto *jumptable[insn->code]; + +- /* ALU */ +-#define ALU(OPCODE, OP) \ +- ALU64_##OPCODE##_X: \ +- DST = DST OP SRC; \ +- CONT; \ +- ALU_##OPCODE##_X: \ +- DST = (u32) DST OP (u32) SRC; \ +- CONT; \ +- ALU64_##OPCODE##_K: \ +- DST = DST OP IMM; \ +- CONT; \ +- ALU_##OPCODE##_K: \ +- DST = (u32) DST OP (u32) IMM; \ ++ /* Explicitly mask the register-based shift amounts with 63 or 31 ++ * to avoid undefined behavior. Normally this won't affect the ++ * generated code, for example, in case of native 64 bit archs such ++ * as x86-64 or arm64, the compiler is optimizing the AND away for ++ * the interpreter. In case of JITs, each of the JIT backends compiles ++ * the BPF shift operations to machine instructions which produce ++ * implementation-defined results in such a case; the resulting ++ * contents of the register may be arbitrary, but program behaviour ++ * as a whole remains defined. In other words, in case of JIT backends, ++ * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation. ++ */ ++ /* ALU (shifts) */ ++#define SHT(OPCODE, OP) \ ++ ALU64_##OPCODE##_X: \ ++ DST = DST OP (SRC & 63); \ ++ CONT; \ ++ ALU_##OPCODE##_X: \ ++ DST = (u32) DST OP ((u32) SRC & 31); \ ++ CONT; \ ++ ALU64_##OPCODE##_K: \ ++ DST = DST OP IMM; \ ++ CONT; \ ++ ALU_##OPCODE##_K: \ ++ DST = (u32) DST OP (u32) IMM; \ ++ CONT; ++ /* ALU (rest) */ ++#define ALU(OPCODE, OP) \ ++ ALU64_##OPCODE##_X: \ ++ DST = DST OP SRC; \ ++ CONT; \ ++ ALU_##OPCODE##_X: \ ++ DST = (u32) DST OP (u32) SRC; \ ++ CONT; \ ++ ALU64_##OPCODE##_K: \ ++ DST = DST OP IMM; \ ++ CONT; \ ++ ALU_##OPCODE##_K: \ ++ DST = (u32) DST OP (u32) IMM; \ + CONT; +- + ALU(ADD, +) + ALU(SUB, -) + ALU(AND, &) + ALU(OR, |) +- ALU(LSH, <<) +- ALU(RSH, >>) + ALU(XOR, ^) + ALU(MUL, *) ++ SHT(LSH, <<) ++ SHT(RSH, >>) ++#undef SHT + #undef ALU + ALU_NEG: + DST = (u32) -DST; +@@ -359,51 +1392,49 @@ select_insn: + DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32; + insn++; + CONT; ++ ALU_ARSH_X: ++ DST = (u64) (u32) (((s32) DST) >> (SRC & 31)); ++ CONT; ++ ALU_ARSH_K: ++ DST = (u64) (u32) (((s32) DST) >> IMM); ++ CONT; + ALU64_ARSH_X: +- (*(s64 *) &DST) >>= SRC; ++ (*(s64 *) &DST) >>= (SRC & 63); + CONT; + ALU64_ARSH_K: + (*(s64 *) &DST) >>= IMM; + CONT; + ALU64_MOD_X: +- if (unlikely(SRC == 0)) +- return 0; +- div64_u64_rem(DST, SRC, &tmp); +- DST = tmp; ++ div64_u64_rem(DST, SRC, &AX); ++ DST = AX; + CONT; + ALU_MOD_X: +- if (unlikely(SRC == 0)) +- return 0; +- tmp = (u32) DST; +- DST = do_div(tmp, (u32) SRC); ++ AX = (u32) DST; ++ DST = do_div(AX, (u32) SRC); + CONT; + ALU64_MOD_K: +- div64_u64_rem(DST, IMM, &tmp); +- DST = tmp; ++ div64_u64_rem(DST, IMM, &AX); ++ DST = AX; + CONT; + ALU_MOD_K: +- tmp = (u32) DST; +- DST = do_div(tmp, (u32) IMM); ++ AX = (u32) DST; ++ DST = do_div(AX, (u32) IMM); + CONT; + ALU64_DIV_X: +- if (unlikely(SRC == 0)) +- return 0; + DST = div64_u64(DST, SRC); + CONT; + ALU_DIV_X: +- if (unlikely(SRC == 0)) +- return 0; +- tmp = (u32) DST; +- do_div(tmp, (u32) SRC); +- DST = (u32) tmp; ++ AX = (u32) DST; ++ do_div(AX, (u32) SRC); ++ DST = (u32) AX; + CONT; + ALU64_DIV_K: + DST = div64_u64(DST, IMM); + CONT; + ALU_DIV_K: +- tmp = (u32) DST; +- do_div(tmp, (u32) IMM); +- DST = (u32) tmp; ++ AX = (u32) DST; ++ do_div(AX, (u32) IMM); ++ DST = (u32) AX; + CONT; + ALU_END_TO_BE: + switch (IMM) { +@@ -442,22 +1473,28 @@ select_insn: + BPF_R4, BPF_R5); + CONT; + ++ JMP_CALL_ARGS: ++ BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2, ++ BPF_R3, BPF_R4, ++ BPF_R5, ++ insn + insn->off + 1); ++ CONT; ++ + JMP_TAIL_CALL: { + struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog; +- u64 index = BPF_R3; ++ u32 index = BPF_R3; + + if (unlikely(index >= array->map.max_entries)) + goto out; +- + if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) + goto out; + + tail_call_cnt++; + + prog = READ_ONCE(array->ptrs[index]); +- if (unlikely(!prog)) ++ if (!prog) + goto out; + + /* ARG1 at this point is guaranteed to point to CTX from +@@ -470,97 +1507,49 @@ select_insn: + out: + CONT; + } +- /* JMP */ + JMP_JA: + insn += insn->off; + CONT; +- JMP_JEQ_X: +- if (DST == SRC) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JEQ_K: +- if (DST == IMM) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JNE_X: +- if (DST != SRC) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JNE_K: +- if (DST != IMM) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JGT_X: +- if (DST > SRC) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JGT_K: +- if (DST > IMM) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JGE_X: +- if (DST >= SRC) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JGE_K: +- if (DST >= IMM) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JSGT_X: +- if (((s64) DST) > ((s64) SRC)) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JSGT_K: +- if (((s64) DST) > ((s64) IMM)) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JSGE_X: +- if (((s64) DST) >= ((s64) SRC)) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JSGE_K: +- if (((s64) DST) >= ((s64) IMM)) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JSET_X: +- if (DST & SRC) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; +- JMP_JSET_K: +- if (DST & IMM) { +- insn += insn->off; +- CONT_JMP; +- } +- CONT; + JMP_EXIT: + return BPF_R0; +- ++ /* JMP */ ++#define COND_JMP(SIGN, OPCODE, CMP_OP) \ ++ JMP_##OPCODE##_X: \ ++ if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \ ++ insn += insn->off; \ ++ CONT_JMP; \ ++ } \ ++ CONT; \ ++ JMP32_##OPCODE##_X: \ ++ if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \ ++ insn += insn->off; \ ++ CONT_JMP; \ ++ } \ ++ CONT; \ ++ JMP_##OPCODE##_K: \ ++ if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \ ++ insn += insn->off; \ ++ CONT_JMP; \ ++ } \ ++ CONT; \ ++ JMP32_##OPCODE##_K: \ ++ if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \ ++ insn += insn->off; \ ++ CONT_JMP; \ ++ } \ ++ CONT; ++ COND_JMP(u, JEQ, ==) ++ COND_JMP(u, JNE, !=) ++ COND_JMP(u, JGT, >) ++ COND_JMP(u, JLT, <) ++ COND_JMP(u, JGE, >=) ++ COND_JMP(u, JLE, <=) ++ COND_JMP(u, JSET, &) ++ COND_JMP(s, JSGT, >) ++ COND_JMP(s, JSLT, <) ++ COND_JMP(s, JSGE, >=) ++ COND_JMP(s, JSLE, <=) ++#undef COND_JMP + /* STX and ST and LDX*/ + #define LDST(SIZEOP, SIZE) \ + STX_MEM_##SIZEOP: \ +@@ -586,77 +1575,108 @@ out: + atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) + (DST + insn->off)); + CONT; +- LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */ +- off = IMM; +-load_word: +- /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are +- * only appearing in the programs where ctx == +- * skb. All programs keep 'ctx' in regs[BPF_REG_CTX] +- * == BPF_R6, bpf_convert_filter() saves it in BPF_R6, +- * internal BPF verifier will check that BPF_R6 == +- * ctx. +- * +- * BPF_ABS and BPF_IND are wrappers of function calls, +- * so they scratch BPF_R1-BPF_R5 registers, preserve +- * BPF_R6-BPF_R9, and store return value into BPF_R0. +- * +- * Implicit input: +- * ctx == skb == BPF_R6 == CTX +- * +- * Explicit input: +- * SRC == any register +- * IMM == 32-bit immediate ++ ++ default_label: ++ /* If we ever reach this, we have a bug somewhere. Die hard here ++ * instead of just returning 0; we could be somewhere in a subprog, ++ * so execution could continue otherwise which we do /not/ want. + * +- * Output: +- * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness ++ * Note, verifier whitelists all opcodes in bpf_opcode_in_insntable(). + */ ++ pr_warn("BPF interpreter: unknown opcode %02x\n", insn->code); ++ BUG_ON(1); ++ return 0; ++} + +- ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp); +- if (likely(ptr != NULL)) { +- BPF_R0 = get_unaligned_be32(ptr); +- CONT; +- } ++#define PROG_NAME(stack_size) __bpf_prog_run##stack_size ++#define DEFINE_BPF_PROG_RUN(stack_size) \ ++static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \ ++{ \ ++ u64 stack[stack_size / sizeof(u64)]; \ ++ u64 regs[MAX_BPF_EXT_REG]; \ ++\ ++ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ ++ ARG1 = (u64) (unsigned long) ctx; \ ++ return ___bpf_prog_run(regs, insn, stack); \ ++} + +- return 0; +- LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */ +- off = IMM; +-load_half: +- ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp); +- if (likely(ptr != NULL)) { +- BPF_R0 = get_unaligned_be16(ptr); +- CONT; +- } ++#define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size ++#define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \ ++static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \ ++ const struct bpf_insn *insn) \ ++{ \ ++ u64 stack[stack_size / sizeof(u64)]; \ ++ u64 regs[MAX_BPF_EXT_REG]; \ ++\ ++ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ ++ BPF_R1 = r1; \ ++ BPF_R2 = r2; \ ++ BPF_R3 = r3; \ ++ BPF_R4 = r4; \ ++ BPF_R5 = r5; \ ++ return ___bpf_prog_run(regs, insn, stack); \ ++} + +- return 0; +- LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */ +- off = IMM; +-load_byte: +- ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp); +- if (likely(ptr != NULL)) { +- BPF_R0 = *(u8 *)ptr; +- CONT; +- } ++#define EVAL1(FN, X) FN(X) ++#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y) ++#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y) ++#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y) ++#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y) ++#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y) ++ ++EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192); ++EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384); ++EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512); ++ ++EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192); ++EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384); ++EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512); ++ ++#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size), ++ ++static unsigned int (*interpreters[])(const void *ctx, ++ const struct bpf_insn *insn) = { ++EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) ++EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) ++EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) ++}; ++#undef PROG_NAME_LIST ++#define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size), ++static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, ++ const struct bpf_insn *insn) = { ++EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) ++EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) ++EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) ++}; ++#undef PROG_NAME_LIST + +- return 0; +- LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */ +- off = IMM + SRC; +- goto load_word; +- LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */ +- off = IMM + SRC; +- goto load_half; +- LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */ +- off = IMM + SRC; +- goto load_byte; ++void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) ++{ ++ stack_depth = max_t(u32, stack_depth, 1); ++ insn->off = (s16) insn->imm; ++ insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] - ++ __bpf_call_base_args; ++ insn->code = BPF_JMP | BPF_CALL_ARGS; ++} + +- default_label: +- /* If we ever reach this, we have a bug somewhere. */ +- WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); +- return 0; ++#else ++static unsigned int __bpf_prog_ret0_warn(const void *ctx, ++ const struct bpf_insn *insn) ++{ ++ /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON ++ * is not working properly, so warn about it! ++ */ ++ WARN_ON_ONCE(1); ++ return 0; + } ++#endif + + bool bpf_prog_array_compatible(struct bpf_array *array, + const struct bpf_prog *fp) + { ++ if (fp->kprobe_override) ++ return false; ++ + if (!array->owner_prog_type) { + /* There's no owner yet where we could check for + * compatibility. +@@ -691,18 +1711,62 @@ static int bpf_check_tail_call(const str + return 0; + } + ++static void bpf_prog_select_func(struct bpf_prog *fp) ++{ ++#ifndef CONFIG_BPF_JIT_ALWAYS_ON ++ u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); ++ ++ fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; ++#else ++ fp->bpf_func = __bpf_prog_ret0_warn; ++#endif ++} ++ + /** + * bpf_prog_select_runtime - select exec runtime for BPF program + * @fp: bpf_prog populated with internal BPF program ++ * @err: pointer to error variable + * + * Try to JIT eBPF program, if JIT is not available, use interpreter. + * The BPF program will be executed via BPF_PROG_RUN() macro. + */ +-int bpf_prog_select_runtime(struct bpf_prog *fp) ++struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) + { +- fp->bpf_func = (void *) __bpf_prog_run; ++ /* In case of BPF to BPF calls, verifier did all the prep ++ * work with regards to JITing, etc. ++ */ ++ if (fp->bpf_func) ++ goto finalize; ++ ++ bpf_prog_select_func(fp); ++ ++ /* eBPF JITs can rewrite the program in case constant ++ * blinding is active. However, in case of error during ++ * blinding, bpf_int_jit_compile() must always return a ++ * valid program, which in this case would simply not ++ * be JITed, but falls back to the interpreter. ++ */ ++ if (!bpf_prog_is_dev_bound(fp->aux)) { ++ *err = bpf_prog_alloc_jited_linfo(fp); ++ if (*err) ++ return fp; ++ ++ fp = bpf_int_jit_compile(fp); ++ if (!fp->jited) { ++ bpf_prog_free_jited_linfo(fp); ++#ifdef CONFIG_BPF_JIT_ALWAYS_ON ++ *err = -ENOTSUPP; ++ return fp; ++#endif ++ } else { ++ bpf_prog_free_unused_jited_linfo(fp); ++ } ++ } else { ++ *err = -EINVAL; ++ return fp; ++ } + +- bpf_int_jit_compile(fp); ++finalize: + bpf_prog_lock_ro(fp); + + /* The tail call compatibility check can only be done at +@@ -710,16 +1774,238 @@ int bpf_prog_select_runtime(struct bpf_p + * with JITed or non JITed program concatenations and not + * all eBPF JITs might immediately support all features. + */ +- return bpf_check_tail_call(fp); ++ *err = bpf_check_tail_call(fp); ++ ++ return fp; + } + EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); + ++static unsigned int __bpf_prog_ret1(const void *ctx, ++ const struct bpf_insn *insn) ++{ ++ return 1; ++} ++ ++static struct bpf_prog_dummy { ++ struct bpf_prog prog; ++} dummy_bpf_prog = { ++ .prog = { ++ .bpf_func = __bpf_prog_ret1, ++ }, ++}; ++ ++/* to avoid allocating empty bpf_prog_array for cgroups that ++ * don't have bpf program attached use one global 'empty_prog_array' ++ * It will not be modified the caller of bpf_prog_array_alloc() ++ * (since caller requested prog_cnt == 0) ++ * that pointer should be 'freed' by bpf_prog_array_free() ++ */ ++static struct { ++ struct bpf_prog_array hdr; ++ struct bpf_prog *null_prog; ++} empty_prog_array = { ++ .null_prog = NULL, ++}; ++ ++struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) ++{ ++ if (prog_cnt) ++ return kzalloc(sizeof(struct bpf_prog_array) + ++ sizeof(struct bpf_prog_array_item) * ++ (prog_cnt + 1), ++ flags); ++ ++ return &empty_prog_array.hdr; ++} ++ ++void bpf_prog_array_free(struct bpf_prog_array *progs) ++{ ++ if (!progs || progs == &empty_prog_array.hdr) ++ return; ++ kfree_rcu(progs, rcu); ++} ++ ++int bpf_prog_array_length(struct bpf_prog_array *array) ++{ ++ struct bpf_prog_array_item *item; ++ u32 cnt = 0; ++ ++ for (item = array->items; item->prog; item++) ++ if (item->prog != &dummy_bpf_prog.prog) ++ cnt++; ++ return cnt; ++} ++ ++bool bpf_prog_array_is_empty(struct bpf_prog_array *array) ++{ ++ struct bpf_prog_array_item *item; ++ ++ for (item = array->items; item->prog; item++) ++ if (item->prog != &dummy_bpf_prog.prog) ++ return false; ++ return true; ++} ++ ++static bool bpf_prog_array_copy_core(struct bpf_prog_array *array, ++ u32 *prog_ids, ++ u32 request_cnt) ++{ ++ struct bpf_prog_array_item *item; ++ int i = 0; ++ ++ for (item = array->items; item->prog; item++) { ++ if (item->prog == &dummy_bpf_prog.prog) ++ continue; ++ prog_ids[i] = item->prog->aux->id; ++ if (++i == request_cnt) { ++ item++; ++ break; ++ } ++ } ++ ++ return !!(item->prog); ++} ++ ++int bpf_prog_array_copy_to_user(struct bpf_prog_array *array, ++ __u32 __user *prog_ids, u32 cnt) ++{ ++ unsigned long err = 0; ++ bool nospc; ++ u32 *ids; ++ ++ /* users of this function are doing: ++ * cnt = bpf_prog_array_length(); ++ * if (cnt > 0) ++ * bpf_prog_array_copy_to_user(..., cnt); ++ * so below kcalloc doesn't need extra cnt > 0 check. ++ */ ++ ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN); ++ if (!ids) ++ return -ENOMEM; ++ nospc = bpf_prog_array_copy_core(array, ids, cnt); ++ err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); ++ kfree(ids); ++ if (err) ++ return -EFAULT; ++ if (nospc) ++ return -ENOSPC; ++ return 0; ++} ++ ++void bpf_prog_array_delete_safe(struct bpf_prog_array *array, ++ struct bpf_prog *old_prog) ++{ ++ struct bpf_prog_array_item *item; ++ ++ for (item = array->items; item->prog; item++) ++ if (item->prog == old_prog) { ++ WRITE_ONCE(item->prog, &dummy_bpf_prog.prog); ++ break; ++ } ++} ++ ++int bpf_prog_array_copy(struct bpf_prog_array *old_array, ++ struct bpf_prog *exclude_prog, ++ struct bpf_prog *include_prog, ++ struct bpf_prog_array **new_array) ++{ ++ int new_prog_cnt, carry_prog_cnt = 0; ++ struct bpf_prog_array_item *existing; ++ struct bpf_prog_array *array; ++ bool found_exclude = false; ++ int new_prog_idx = 0; ++ ++ /* Figure out how many existing progs we need to carry over to ++ * the new array. ++ */ ++ if (old_array) { ++ existing = old_array->items; ++ for (; existing->prog; existing++) { ++ if (existing->prog == exclude_prog) { ++ found_exclude = true; ++ continue; ++ } ++ if (existing->prog != &dummy_bpf_prog.prog) ++ carry_prog_cnt++; ++ if (existing->prog == include_prog) ++ return -EEXIST; ++ } ++ } ++ ++ if (exclude_prog && !found_exclude) ++ return -ENOENT; ++ ++ /* How many progs (not NULL) will be in the new array? */ ++ new_prog_cnt = carry_prog_cnt; ++ if (include_prog) ++ new_prog_cnt += 1; ++ ++ /* Do we have any prog (not NULL) in the new array? */ ++ if (!new_prog_cnt) { ++ *new_array = NULL; ++ return 0; ++ } ++ ++ /* +1 as the end of prog_array is marked with NULL */ ++ array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL); ++ if (!array) ++ return -ENOMEM; ++ ++ /* Fill in the new prog array */ ++ if (carry_prog_cnt) { ++ existing = old_array->items; ++ for (; existing->prog; existing++) ++ if (existing->prog != exclude_prog && ++ existing->prog != &dummy_bpf_prog.prog) { ++ array->items[new_prog_idx++].prog = ++ existing->prog; ++ } ++ } ++ if (include_prog) ++ array->items[new_prog_idx++].prog = include_prog; ++ array->items[new_prog_idx].prog = NULL; ++ *new_array = array; ++ return 0; ++} ++ ++int bpf_prog_array_copy_info(struct bpf_prog_array *array, ++ u32 *prog_ids, u32 request_cnt, ++ u32 *prog_cnt) ++{ ++ u32 cnt = 0; ++ ++ if (array) ++ cnt = bpf_prog_array_length(array); ++ ++ *prog_cnt = cnt; ++ ++ /* return early if user requested only program count or nothing to copy */ ++ if (!request_cnt || !cnt) ++ return 0; ++ ++ /* this function is called under trace/bpf_trace.c: bpf_event_mutex */ ++ return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC ++ : 0; ++} ++ + static void bpf_prog_free_deferred(struct work_struct *work) + { + struct bpf_prog_aux *aux; ++ int i; + + aux = container_of(work, struct bpf_prog_aux, work); +- bpf_jit_free(aux->prog); ++#ifdef CONFIG_PERF_EVENTS ++ if (aux->prog->has_callchain_buf) ++ put_callchain_buffers(); ++#endif ++ for (i = 0; i < aux->func_cnt; i++) ++ bpf_jit_free(aux->func[i]); ++ if (aux->func_cnt) { ++ kfree(aux->func); ++ bpf_prog_unlock_free(aux->prog); ++ } else { ++ bpf_jit_free(aux->prog); ++ } + } + + /* Free internal BPF program */ +@@ -740,7 +2026,7 @@ void bpf_user_rnd_init_once(void) + prandom_init_once(&bpf_user_rnd_state); + } + +-u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) ++BPF_CALL_0(bpf_user_rnd_u32) + { + /* Should someone ever have the rather unwise idea to use some + * of the registers passed into this function, then note that +@@ -753,7 +2039,7 @@ u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 + + state = &get_cpu_var(bpf_user_rnd_state); + res = prandom_u32_state(state); +- put_cpu_var(state); ++ put_cpu_var(bpf_user_rnd_state); + + return res; + } +@@ -762,18 +2048,36 @@ u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 + const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; + const struct bpf_func_proto bpf_map_update_elem_proto __weak; + const struct bpf_func_proto bpf_map_delete_elem_proto __weak; ++const struct bpf_func_proto bpf_map_push_elem_proto __weak; ++const struct bpf_func_proto bpf_map_pop_elem_proto __weak; ++const struct bpf_func_proto bpf_map_peek_elem_proto __weak; ++const struct bpf_func_proto bpf_spin_lock_proto __weak; ++const struct bpf_func_proto bpf_spin_unlock_proto __weak; + + const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; + const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; ++const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; + const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; ++ + const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; + const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; + const struct bpf_func_proto bpf_get_current_comm_proto __weak; ++const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; ++const struct bpf_func_proto bpf_get_local_storage_proto __weak; ++ + const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) + { + return NULL; + } + ++u64 __weak ++bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, ++ void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) ++{ ++ return -ENOTSUPP; ++} ++EXPORT_SYMBOL_GPL(bpf_event_output); ++ + /* Always built-in helper functions. */ + const struct bpf_func_proto bpf_tail_call_proto = { + .func = NULL, +@@ -784,9 +2088,34 @@ const struct bpf_func_proto bpf_tail_cal + .arg3_type = ARG_ANYTHING, + }; + +-/* For classic BPF JITs that don't implement bpf_int_jit_compile(). */ +-void __weak bpf_int_jit_compile(struct bpf_prog *prog) ++/* Stub for JITs that only support cBPF. eBPF programs are interpreted. ++ * It is encouraged to implement bpf_int_jit_compile() instead, so that ++ * eBPF and implicitly also cBPF can get JITed! ++ */ ++struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog) + { ++ return prog; ++} ++ ++/* Stub for JITs that support eBPF. All cBPF code gets transformed into ++ * eBPF by the kernel and is later compiled by bpf_int_jit_compile(). ++ */ ++void __weak bpf_jit_compile(struct bpf_prog *prog) ++{ ++} ++ ++bool __weak bpf_helper_changes_pkt_data(void *func) ++{ ++ return false; ++} ++ ++/* Return TRUE if the JIT backend wants verifier to enable sub-register usage ++ * analysis code and wants explicit zero extension inserted by verifier. ++ * Otherwise, return FALSE. ++ */ ++bool __weak bpf_jit_needs_zext(void) ++{ ++ return false; + } + + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call +@@ -797,3 +2126,13 @@ int __weak skb_copy_bits(const struct sk + { + return -EFAULT; + } ++ ++DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); ++EXPORT_SYMBOL(bpf_stats_enabled_key); ++ ++/* All definitions of tracepoints related to BPF. */ ++#define CREATE_TRACE_POINTS ++#include ++ ++EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); ++EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx); +--- /dev/null ++++ b/kernel/bpf/devmap.c +@@ -0,0 +1,698 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io ++ */ ++ ++/* Devmaps primary use is as a backend map for XDP BPF helper call ++ * bpf_redirect_map(). Because XDP is mostly concerned with performance we ++ * spent some effort to ensure the datapath with redirect maps does not use ++ * any locking. This is a quick note on the details. ++ * ++ * We have three possible paths to get into the devmap control plane bpf ++ * syscalls, bpf programs, and driver side xmit/flush operations. A bpf syscall ++ * will invoke an update, delete, or lookup operation. To ensure updates and ++ * deletes appear atomic from the datapath side xchg() is used to modify the ++ * netdev_map array. Then because the datapath does a lookup into the netdev_map ++ * array (read-only) from an RCU critical section we use call_rcu() to wait for ++ * an rcu grace period before free'ing the old data structures. This ensures the ++ * datapath always has a valid copy. However, the datapath does a "flush" ++ * operation that pushes any pending packets in the driver outside the RCU ++ * critical section. Each bpf_dtab_netdev tracks these pending operations using ++ * a per-cpu flush list. The bpf_dtab_netdev object will not be destroyed until ++ * this list is empty, indicating outstanding flush operations have completed. ++ * ++ * BPF syscalls may race with BPF program calls on any of the update, delete ++ * or lookup operations. As noted above the xchg() operation also keep the ++ * netdev_map consistent in this case. From the devmap side BPF programs ++ * calling into these operations are the same as multiple user space threads ++ * making system calls. ++ * ++ * Finally, any of the above may race with a netdev_unregister notifier. The ++ * unregister notifier must search for net devices in the map structure that ++ * contain a reference to the net device and remove them. This is a two step ++ * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b) ++ * check to see if the ifindex is the same as the net_device being removed. ++ * When removing the dev a cmpxchg() is used to ensure the correct dev is ++ * removed, in the case of a concurrent update or delete operation it is ++ * possible that the initially referenced dev is no longer in the map. As the ++ * notifier hook walks the map we know that new dev references can not be ++ * added by the user because core infrastructure ensures dev_get_by_index() ++ * calls will fail at this point. ++ * ++ * The devmap_hash type is a map type which interprets keys as ifindexes and ++ * indexes these using a hashmap. This allows maps that use ifindex as key to be ++ * densely packed instead of having holes in the lookup array for unused ++ * ifindexes. The setup and packet enqueue/send code is shared between the two ++ * types of devmap; only the lookup and insertion is different. ++ */ ++#include ++#include ++#include ++#include ++ ++#define DEV_CREATE_FLAG_MASK \ ++ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) ++ ++#define DEV_MAP_BULK_SIZE 16 ++struct bpf_dtab_netdev; ++ ++struct xdp_bulk_queue { ++ struct xdp_frame *q[DEV_MAP_BULK_SIZE]; ++ struct list_head flush_node; ++ struct net_device *dev_rx; ++ struct bpf_dtab_netdev *obj; ++ unsigned int count; ++}; ++ ++struct bpf_dtab_netdev { ++ struct net_device *dev; /* must be first member, due to tracepoint */ ++ struct hlist_node index_hlist; ++ struct bpf_dtab *dtab; ++ struct xdp_bulk_queue __percpu *bulkq; ++ struct rcu_head rcu; ++ unsigned int idx; /* keep track of map index for tracepoint */ ++}; ++ ++struct bpf_dtab { ++ struct bpf_map map; ++ struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */ ++ struct list_head __percpu *flush_list; ++ struct list_head list; ++ ++ /* these are only used for DEVMAP_HASH type maps */ ++ struct hlist_head *dev_index_head; ++ spinlock_t index_lock; ++ unsigned int items; ++ u32 n_buckets; ++}; ++ ++static DEFINE_SPINLOCK(dev_map_lock); ++static LIST_HEAD(dev_map_list); ++ ++static struct hlist_head *dev_map_create_hash(unsigned int entries, ++ int numa_node) ++{ ++ int i; ++ struct hlist_head *hash; ++ ++ hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node); ++ if (hash != NULL) ++ for (i = 0; i < entries; i++) ++ INIT_HLIST_HEAD(&hash[i]); ++ ++ return hash; ++} ++ ++static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab, ++ int idx) ++{ ++ return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)]; ++} ++ ++static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) ++{ ++ int err, cpu; ++ u64 cost; ++ ++ /* check sanity of attributes */ ++ if (attr->max_entries == 0 || attr->key_size != 4 || ++ attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) ++ return -EINVAL; ++ ++ /* Lookup returns a pointer straight to dev->ifindex, so make sure the ++ * verifier prevents writes from the BPF side ++ */ ++ attr->map_flags |= BPF_F_RDONLY_PROG; ++ ++ ++ bpf_map_init_from_attr(&dtab->map, attr); ++ ++ /* make sure page count doesn't overflow */ ++ cost = (u64) sizeof(struct list_head) * num_possible_cpus(); ++ ++ if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { ++ dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); ++ ++ if (!dtab->n_buckets) /* Overflow check */ ++ return -EINVAL; ++ cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets; ++ } else { ++ cost += (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); ++ } ++ ++ /* if map size is larger than memlock limit, reject it */ ++ err = bpf_map_charge_init(&dtab->map.memory, cost); ++ if (err) ++ return -EINVAL; ++ ++ dtab->flush_list = alloc_percpu(struct list_head); ++ if (!dtab->flush_list) ++ goto free_charge; ++ ++ for_each_possible_cpu(cpu) ++ INIT_LIST_HEAD(per_cpu_ptr(dtab->flush_list, cpu)); ++ ++ if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { ++ dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, ++ dtab->map.numa_node); ++ if (!dtab->dev_index_head) ++ goto free_percpu; ++ ++ spin_lock_init(&dtab->index_lock); ++ } else { ++ dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * ++ sizeof(struct bpf_dtab_netdev *), ++ dtab->map.numa_node); ++ if (!dtab->netdev_map) ++ goto free_percpu; ++ } ++ ++ return 0; ++ ++free_percpu: ++ free_percpu(dtab->flush_list); ++free_charge: ++ bpf_map_charge_finish(&dtab->map.memory); ++ return -ENOMEM; ++} ++ ++static struct bpf_map *dev_map_alloc(union bpf_attr *attr) ++{ ++ struct bpf_dtab *dtab; ++ int err; ++ ++ if (!capable(CAP_NET_ADMIN)) ++ return ERR_PTR(-EPERM); ++ ++ dtab = kzalloc(sizeof(*dtab), GFP_USER); ++ if (!dtab) ++ return ERR_PTR(-ENOMEM); ++ ++ err = dev_map_init_map(dtab, attr); ++ if (err) { ++ kfree(dtab); ++ return ERR_PTR(err); ++ } ++ ++ spin_lock(&dev_map_lock); ++ list_add_tail_rcu(&dtab->list, &dev_map_list); ++ spin_unlock(&dev_map_lock); ++ ++ return &dtab->map; ++} ++ ++static void dev_map_free(struct bpf_map *map) ++{ ++ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); ++ int i, cpu; ++ ++ /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, ++ * so the programs (can be more than one that used this map) were ++ * disconnected from events. Wait for outstanding critical sections in ++ * these programs to complete. The rcu critical section only guarantees ++ * no further reads against netdev_map. It does __not__ ensure pending ++ * flush operations (if any) are complete. ++ */ ++ ++ spin_lock(&dev_map_lock); ++ list_del_rcu(&dtab->list); ++ spin_unlock(&dev_map_lock); ++ ++ bpf_clear_redirect_map(map); ++ synchronize_rcu(); ++ ++ /* Make sure prior __dev_map_entry_free() have completed. */ ++ rcu_barrier(); ++ ++ /* To ensure all pending flush operations have completed wait for flush ++ * list to empty on _all_ cpus. ++ * Because the above synchronize_rcu() ensures the map is disconnected ++ * from the program we can assume no new items will be added. ++ */ ++ for_each_online_cpu(cpu) { ++ struct list_head *flush_list = per_cpu_ptr(dtab->flush_list, cpu); ++ ++ while (!list_empty(flush_list)) ++ cond_resched(); ++ } ++ ++ if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) { ++ for (i = 0; i < dtab->n_buckets; i++) { ++ struct bpf_dtab_netdev *dev; ++ struct hlist_head *head; ++ struct hlist_node *next; ++ ++ head = dev_map_index_hash(dtab, i); ++ ++ hlist_for_each_entry_safe(dev, next, head, index_hlist) { ++ hlist_del_rcu(&dev->index_hlist); ++ free_percpu(dev->bulkq); ++ dev_put(dev->dev); ++ kfree(dev); ++ } ++ } ++ ++ bpf_map_area_free(dtab->dev_index_head); ++ } else { ++ for (i = 0; i < dtab->map.max_entries; i++) { ++ struct bpf_dtab_netdev *dev; ++ ++ dev = dtab->netdev_map[i]; ++ if (!dev) ++ continue; ++ ++ free_percpu(dev->bulkq); ++ dev_put(dev->dev); ++ kfree(dev); ++ } ++ ++ bpf_map_area_free(dtab->netdev_map); ++ } ++ ++ free_percpu(dtab->flush_list); ++ kfree(dtab); ++} ++ ++static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) ++{ ++ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); ++ u32 index = key ? *(u32 *)key : U32_MAX; ++ u32 *next = next_key; ++ ++ if (index >= dtab->map.max_entries) { ++ *next = 0; ++ return 0; ++ } ++ ++ if (index == dtab->map.max_entries - 1) ++ return -ENOENT; ++ *next = index + 1; ++ return 0; ++} ++ ++struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key) ++{ ++ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); ++ struct hlist_head *head = dev_map_index_hash(dtab, key); ++ struct bpf_dtab_netdev *dev; ++ ++ hlist_for_each_entry_rcu(dev, head, index_hlist) ++ if (dev->idx == key) ++ return dev; ++ ++ return NULL; ++} ++ ++static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, ++ void *next_key) ++{ ++ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); ++ u32 idx, *next = next_key; ++ struct bpf_dtab_netdev *dev, *next_dev; ++ struct hlist_head *head; ++ int i = 0; ++ ++ if (!key) ++ goto find_first; ++ ++ idx = *(u32 *)key; ++ ++ dev = __dev_map_hash_lookup_elem(map, idx); ++ if (!dev) ++ goto find_first; ++ ++ next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)), ++ struct bpf_dtab_netdev, index_hlist); ++ ++ if (next_dev) { ++ *next = next_dev->idx; ++ return 0; ++ } ++ ++ i = idx & (dtab->n_buckets - 1); ++ i++; ++ ++ find_first: ++ for (; i < dtab->n_buckets; i++) { ++ head = dev_map_index_hash(dtab, i); ++ ++ next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), ++ struct bpf_dtab_netdev, ++ index_hlist); ++ if (next_dev) { ++ *next = next_dev->idx; ++ return 0; ++ } ++ } ++ ++ return -ENOENT; ++} ++ ++/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled ++ * from the driver before returning from its napi->poll() routine. The poll() ++ * routine is called either from busy_poll context or net_rx_action signaled ++ * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the ++ * net device can be torn down. On devmap tear down we ensure the flush list ++ * is empty before completing to ensure all flush operations have completed. ++ */ ++void __dev_map_flush(struct bpf_map *map) ++{ ++} ++ ++/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or ++ * update happens in parallel here a dev_put wont happen until after reading the ++ * ifindex. ++ */ ++struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key) ++{ ++ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); ++ struct bpf_dtab_netdev *obj; ++ ++ if (key >= map->max_entries) ++ return NULL; ++ ++ obj = READ_ONCE(dtab->netdev_map[key]); ++ return obj; ++} ++ ++int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, ++ struct net_device *dev_rx) ++{ ++ return -EOPNOTSUPP; ++} ++ ++int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, ++ struct bpf_prog *xdp_prog) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static void *dev_map_lookup_elem(struct bpf_map *map, void *key) ++{ ++ struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key); ++ struct net_device *dev = obj ? obj->dev : NULL; ++ ++ return dev ? &dev->ifindex : NULL; ++} ++ ++static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key) ++{ ++ struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map, ++ *(u32 *)key); ++ struct net_device *dev = obj ? obj->dev : NULL; ++ ++ return dev ? &dev->ifindex : NULL; ++} ++ ++static void __dev_map_entry_free(struct rcu_head *rcu) ++{ ++ struct bpf_dtab_netdev *dev; ++ ++ dev = container_of(rcu, struct bpf_dtab_netdev, rcu); ++ free_percpu(dev->bulkq); ++ dev_put(dev->dev); ++ kfree(dev); ++} ++ ++static int dev_map_delete_elem(struct bpf_map *map, void *key) ++{ ++ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); ++ struct bpf_dtab_netdev *old_dev; ++ int k = *(u32 *)key; ++ ++ if (k >= map->max_entries) ++ return -EINVAL; ++ ++ /* Use call_rcu() here to ensure any rcu critical sections have ++ * completed, but this does not guarantee a flush has happened ++ * yet. Because driver side rcu_read_lock/unlock only protects the ++ * running XDP program. However, for pending flush operations the ++ * dev and ctx are stored in another per cpu map. And additionally, ++ * the driver tear down ensures all soft irqs are complete before ++ * removing the net device in the case of dev_put equals zero. ++ */ ++ old_dev = xchg(&dtab->netdev_map[k], NULL); ++ if (old_dev) ++ call_rcu(&old_dev->rcu, __dev_map_entry_free); ++ return 0; ++} ++ ++static int dev_map_hash_delete_elem(struct bpf_map *map, void *key) ++{ ++ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); ++ struct bpf_dtab_netdev *old_dev; ++ int k = *(u32 *)key; ++ unsigned long flags; ++ int ret = -ENOENT; ++ ++ spin_lock_irqsave(&dtab->index_lock, flags); ++ ++ old_dev = __dev_map_hash_lookup_elem(map, k); ++ if (old_dev) { ++ dtab->items--; ++ hlist_del_init_rcu(&old_dev->index_hlist); ++ call_rcu(&old_dev->rcu, __dev_map_entry_free); ++ ret = 0; ++ } ++ spin_unlock_irqrestore(&dtab->index_lock, flags); ++ ++ return ret; ++} ++ ++static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, ++ struct bpf_dtab *dtab, ++ u32 ifindex, ++ unsigned int idx) ++{ ++ gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; ++ struct bpf_dtab_netdev *dev; ++ struct xdp_bulk_queue *bq; ++ int cpu; ++ ++ dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node); ++ if (!dev) ++ return ERR_PTR(-ENOMEM); ++ ++ dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), ++ sizeof(void *), gfp); ++ if (!dev->bulkq) { ++ kfree(dev); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ for_each_possible_cpu(cpu) { ++ bq = per_cpu_ptr(dev->bulkq, cpu); ++ bq->obj = dev; ++ } ++ ++ dev->dev = dev_get_by_index(net, ifindex); ++ if (!dev->dev) { ++ free_percpu(dev->bulkq); ++ kfree(dev); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ dev->idx = idx; ++ dev->dtab = dtab; ++ ++ return dev; ++} ++ ++static int __dev_map_update_elem(struct net *net, struct bpf_map *map, ++ void *key, void *value, u64 map_flags) ++{ ++ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); ++ struct bpf_dtab_netdev *dev, *old_dev; ++ u32 ifindex = *(u32 *)value; ++ u32 i = *(u32 *)key; ++ ++ if (unlikely(map_flags > BPF_EXIST)) ++ return -EINVAL; ++ if (unlikely(i >= dtab->map.max_entries)) ++ return -E2BIG; ++ if (unlikely(map_flags == BPF_NOEXIST)) ++ return -EEXIST; ++ ++ if (!ifindex) { ++ dev = NULL; ++ } else { ++ dev = __dev_map_alloc_node(net, dtab, ifindex, i); ++ if (IS_ERR(dev)) ++ return PTR_ERR(dev); ++ } ++ ++ /* Use call_rcu() here to ensure rcu critical sections have completed ++ * Remembering the driver side flush operation will happen before the ++ * net device is removed. ++ */ ++ old_dev = xchg(&dtab->netdev_map[i], dev); ++ if (old_dev) ++ call_rcu(&old_dev->rcu, __dev_map_entry_free); ++ ++ return 0; ++} ++ ++static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, ++ u64 map_flags) ++{ ++ return __dev_map_update_elem(current->nsproxy->net_ns, ++ map, key, value, map_flags); ++} ++ ++static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, ++ void *key, void *value, u64 map_flags) ++{ ++ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); ++ struct bpf_dtab_netdev *dev, *old_dev; ++ u32 ifindex = *(u32 *)value; ++ u32 idx = *(u32 *)key; ++ unsigned long flags; ++ int err = -EEXIST; ++ ++ if (unlikely(map_flags > BPF_EXIST || !ifindex)) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&dtab->index_lock, flags); ++ ++ old_dev = __dev_map_hash_lookup_elem(map, idx); ++ if (old_dev && (map_flags & BPF_NOEXIST)) ++ goto out_err; ++ ++ dev = __dev_map_alloc_node(net, dtab, ifindex, idx); ++ if (IS_ERR(dev)) { ++ err = PTR_ERR(dev); ++ goto out_err; ++ } ++ ++ if (old_dev) { ++ hlist_del_rcu(&old_dev->index_hlist); ++ } else { ++ if (dtab->items >= dtab->map.max_entries) { ++ spin_unlock_irqrestore(&dtab->index_lock, flags); ++ call_rcu(&dev->rcu, __dev_map_entry_free); ++ return -E2BIG; ++ } ++ dtab->items++; ++ } ++ ++ hlist_add_head_rcu(&dev->index_hlist, ++ dev_map_index_hash(dtab, idx)); ++ spin_unlock_irqrestore(&dtab->index_lock, flags); ++ ++ if (old_dev) ++ call_rcu(&old_dev->rcu, __dev_map_entry_free); ++ ++ return 0; ++ ++out_err: ++ spin_unlock_irqrestore(&dtab->index_lock, flags); ++ return err; ++} ++ ++static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, ++ u64 map_flags) ++{ ++ return __dev_map_hash_update_elem(current->nsproxy->net_ns, ++ map, key, value, map_flags); ++} ++ ++const struct bpf_map_ops dev_map_ops = { ++ .map_alloc = dev_map_alloc, ++ .map_free = dev_map_free, ++ .map_get_next_key = dev_map_get_next_key, ++ .map_lookup_elem = dev_map_lookup_elem, ++ .map_update_elem = dev_map_update_elem, ++ .map_delete_elem = dev_map_delete_elem, ++ .map_check_btf = map_check_no_btf, ++}; ++ ++const struct bpf_map_ops dev_map_hash_ops = { ++ .map_alloc = dev_map_alloc, ++ .map_free = dev_map_free, ++ .map_get_next_key = dev_map_hash_get_next_key, ++ .map_lookup_elem = dev_map_hash_lookup_elem, ++ .map_update_elem = dev_map_hash_update_elem, ++ .map_delete_elem = dev_map_hash_delete_elem, ++ .map_check_btf = map_check_no_btf, ++}; ++ ++static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab, ++ struct net_device *netdev) ++{ ++ unsigned long flags; ++ u32 i; ++ ++ spin_lock_irqsave(&dtab->index_lock, flags); ++ for (i = 0; i < dtab->n_buckets; i++) { ++ struct bpf_dtab_netdev *dev; ++ struct hlist_head *head; ++ struct hlist_node *next; ++ ++ head = dev_map_index_hash(dtab, i); ++ ++ hlist_for_each_entry_safe(dev, next, head, index_hlist) { ++ if (netdev != dev->dev) ++ continue; ++ ++ dtab->items--; ++ hlist_del_rcu(&dev->index_hlist); ++ call_rcu(&dev->rcu, __dev_map_entry_free); ++ } ++ } ++ spin_unlock_irqrestore(&dtab->index_lock, flags); ++} ++ ++static int dev_map_notification(struct notifier_block *notifier, ++ ulong event, void *ptr) ++{ ++ struct net_device *netdev = netdev_notifier_info_to_dev(ptr); ++ struct bpf_dtab *dtab; ++ int i; ++ ++ switch (event) { ++ case NETDEV_UNREGISTER: ++ /* This rcu_read_lock/unlock pair is needed because ++ * dev_map_list is an RCU list AND to ensure a delete ++ * operation does not free a netdev_map entry while we ++ * are comparing it against the netdev being unregistered. ++ */ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(dtab, &dev_map_list, list) { ++ if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) { ++ dev_map_hash_remove_netdev(dtab, netdev); ++ continue; ++ } ++ ++ for (i = 0; i < dtab->map.max_entries; i++) { ++ struct bpf_dtab_netdev *dev, *odev; ++ ++ dev = READ_ONCE(dtab->netdev_map[i]); ++ if (!dev || netdev != dev->dev) ++ continue; ++ odev = cmpxchg(&dtab->netdev_map[i], dev, NULL); ++ if (dev == odev) ++ call_rcu(&dev->rcu, ++ __dev_map_entry_free); ++ } ++ } ++ rcu_read_unlock(); ++ break; ++ default: ++ break; ++ } ++ return NOTIFY_OK; ++} ++ ++static struct notifier_block dev_map_notifier = { ++ .notifier_call = dev_map_notification, ++}; ++ ++static int __init dev_map_init(void) ++{ ++ /* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */ ++ BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) != ++ offsetof(struct _bpf_dtab_netdev, dev)); ++ register_netdevice_notifier(&dev_map_notifier); ++ return 0; ++} ++ ++subsys_initcall(dev_map_init); +--- /dev/null ++++ b/kernel/bpf/disasm.c +@@ -0,0 +1,258 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com ++ * Copyright (c) 2016 Facebook ++ */ ++ ++#include ++ ++#include "disasm.h" ++ ++#define __BPF_FUNC_STR_FN(x) [BPF_FUNC_ ## x] = __stringify(bpf_ ## x) ++static const char * const func_id_str[] = { ++ __BPF_FUNC_MAPPER(__BPF_FUNC_STR_FN) ++}; ++#undef __BPF_FUNC_STR_FN ++ ++static const char *__func_get_name(const struct bpf_insn_cbs *cbs, ++ const struct bpf_insn *insn, ++ char *buff, size_t len) ++{ ++ BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID); ++ ++ if (insn->src_reg != BPF_PSEUDO_CALL && ++ insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID && ++ func_id_str[insn->imm]) ++ return func_id_str[insn->imm]; ++ ++ if (cbs && cbs->cb_call) ++ return cbs->cb_call(cbs->private_data, insn); ++ ++ if (insn->src_reg == BPF_PSEUDO_CALL) ++ snprintf(buff, len, "%+d", insn->imm); ++ ++ return buff; ++} ++ ++static const char *__func_imm_name(const struct bpf_insn_cbs *cbs, ++ const struct bpf_insn *insn, ++ u64 full_imm, char *buff, size_t len) ++{ ++ if (cbs && cbs->cb_imm) ++ return cbs->cb_imm(cbs->private_data, insn, full_imm); ++ ++ snprintf(buff, len, "0x%llx", (unsigned long long)full_imm); ++ return buff; ++} ++ ++const char *func_id_name(int id) ++{ ++ if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id]) ++ return func_id_str[id]; ++ else ++ return "unknown"; ++} ++ ++const char *const bpf_class_string[8] = { ++ [BPF_LD] = "ld", ++ [BPF_LDX] = "ldx", ++ [BPF_ST] = "st", ++ [BPF_STX] = "stx", ++ [BPF_ALU] = "alu", ++ [BPF_JMP] = "jmp", ++ [BPF_JMP32] = "jmp32", ++ [BPF_ALU64] = "alu64", ++}; ++ ++const char *const bpf_alu_string[16] = { ++ [BPF_ADD >> 4] = "+=", ++ [BPF_SUB >> 4] = "-=", ++ [BPF_MUL >> 4] = "*=", ++ [BPF_DIV >> 4] = "/=", ++ [BPF_OR >> 4] = "|=", ++ [BPF_AND >> 4] = "&=", ++ [BPF_LSH >> 4] = "<<=", ++ [BPF_RSH >> 4] = ">>=", ++ [BPF_NEG >> 4] = "neg", ++ [BPF_MOD >> 4] = "%=", ++ [BPF_XOR >> 4] = "^=", ++ [BPF_MOV >> 4] = "=", ++ [BPF_ARSH >> 4] = "s>>=", ++ [BPF_END >> 4] = "endian", ++}; ++ ++static const char *const bpf_ldst_string[] = { ++ [BPF_W >> 3] = "u32", ++ [BPF_H >> 3] = "u16", ++ [BPF_B >> 3] = "u8", ++ [BPF_DW >> 3] = "u64", ++}; ++ ++static const char *const bpf_jmp_string[16] = { ++ [BPF_JA >> 4] = "jmp", ++ [BPF_JEQ >> 4] = "==", ++ [BPF_JGT >> 4] = ">", ++ [BPF_JLT >> 4] = "<", ++ [BPF_JGE >> 4] = ">=", ++ [BPF_JLE >> 4] = "<=", ++ [BPF_JSET >> 4] = "&", ++ [BPF_JNE >> 4] = "!=", ++ [BPF_JSGT >> 4] = "s>", ++ [BPF_JSLT >> 4] = "s<", ++ [BPF_JSGE >> 4] = "s>=", ++ [BPF_JSLE >> 4] = "s<=", ++ [BPF_CALL >> 4] = "call", ++ [BPF_EXIT >> 4] = "exit", ++}; ++ ++static void print_bpf_end_insn(bpf_insn_print_t verbose, ++ void *private_data, ++ const struct bpf_insn *insn) ++{ ++ verbose(private_data, "(%02x) r%d = %s%d r%d\n", ++ insn->code, insn->dst_reg, ++ BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le", ++ insn->imm, insn->dst_reg); ++} ++ ++void print_bpf_insn(const struct bpf_insn_cbs *cbs, ++ const struct bpf_insn *insn, ++ bool allow_ptr_leaks) ++{ ++ const bpf_insn_print_t verbose = cbs->cb_print; ++ u8 class = BPF_CLASS(insn->code); ++ ++ if (class == BPF_ALU || class == BPF_ALU64) { ++ if (BPF_OP(insn->code) == BPF_END) { ++ if (class == BPF_ALU64) ++ verbose(cbs->private_data, "BUG_alu64_%02x\n", insn->code); ++ else ++ print_bpf_end_insn(verbose, cbs->private_data, insn); ++ } else if (BPF_OP(insn->code) == BPF_NEG) { ++ verbose(cbs->private_data, "(%02x) %c%d = -%c%d\n", ++ insn->code, class == BPF_ALU ? 'w' : 'r', ++ insn->dst_reg, class == BPF_ALU ? 'w' : 'r', ++ insn->dst_reg); ++ } else if (BPF_SRC(insn->code) == BPF_X) { ++ verbose(cbs->private_data, "(%02x) %c%d %s %c%d\n", ++ insn->code, class == BPF_ALU ? 'w' : 'r', ++ insn->dst_reg, ++ bpf_alu_string[BPF_OP(insn->code) >> 4], ++ class == BPF_ALU ? 'w' : 'r', ++ insn->src_reg); ++ } else { ++ verbose(cbs->private_data, "(%02x) %c%d %s %d\n", ++ insn->code, class == BPF_ALU ? 'w' : 'r', ++ insn->dst_reg, ++ bpf_alu_string[BPF_OP(insn->code) >> 4], ++ insn->imm); ++ } ++ } else if (class == BPF_STX) { ++ if (BPF_MODE(insn->code) == BPF_MEM) ++ verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = r%d\n", ++ insn->code, ++ bpf_ldst_string[BPF_SIZE(insn->code) >> 3], ++ insn->dst_reg, ++ insn->off, insn->src_reg); ++ else if (BPF_MODE(insn->code) == BPF_XADD) ++ verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) += r%d\n", ++ insn->code, ++ bpf_ldst_string[BPF_SIZE(insn->code) >> 3], ++ insn->dst_reg, insn->off, ++ insn->src_reg); ++ else ++ verbose(cbs->private_data, "BUG_%02x\n", insn->code); ++ } else if (class == BPF_ST) { ++ if (BPF_MODE(insn->code) != BPF_MEM) { ++ verbose(cbs->private_data, "BUG_st_%02x\n", insn->code); ++ return; ++ } ++ verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n", ++ insn->code, ++ bpf_ldst_string[BPF_SIZE(insn->code) >> 3], ++ insn->dst_reg, ++ insn->off, insn->imm); ++ } else if (class == BPF_LDX) { ++ if (BPF_MODE(insn->code) != BPF_MEM) { ++ verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code); ++ return; ++ } ++ verbose(cbs->private_data, "(%02x) r%d = *(%s *)(r%d %+d)\n", ++ insn->code, insn->dst_reg, ++ bpf_ldst_string[BPF_SIZE(insn->code) >> 3], ++ insn->src_reg, insn->off); ++ } else if (class == BPF_LD) { ++ if (BPF_MODE(insn->code) == BPF_ABS) { ++ verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[%d]\n", ++ insn->code, ++ bpf_ldst_string[BPF_SIZE(insn->code) >> 3], ++ insn->imm); ++ } else if (BPF_MODE(insn->code) == BPF_IND) { ++ verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[r%d + %d]\n", ++ insn->code, ++ bpf_ldst_string[BPF_SIZE(insn->code) >> 3], ++ insn->src_reg, insn->imm); ++ } else if (BPF_MODE(insn->code) == BPF_IMM && ++ BPF_SIZE(insn->code) == BPF_DW) { ++ /* At this point, we already made sure that the second ++ * part of the ldimm64 insn is accessible. ++ */ ++ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; ++ bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD || ++ insn->src_reg == BPF_PSEUDO_MAP_VALUE; ++ char tmp[64]; ++ ++ if (is_ptr && !allow_ptr_leaks) ++ imm = 0; ++ ++ verbose(cbs->private_data, "(%02x) r%d = %s\n", ++ insn->code, insn->dst_reg, ++ __func_imm_name(cbs, insn, imm, ++ tmp, sizeof(tmp))); ++ } else { ++ verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code); ++ return; ++ } ++ } else if (class == BPF_JMP32 || class == BPF_JMP) { ++ u8 opcode = BPF_OP(insn->code); ++ ++ if (opcode == BPF_CALL) { ++ char tmp[64]; ++ ++ if (insn->src_reg == BPF_PSEUDO_CALL) { ++ verbose(cbs->private_data, "(%02x) call pc%s\n", ++ insn->code, ++ __func_get_name(cbs, insn, ++ tmp, sizeof(tmp))); ++ } else { ++ strcpy(tmp, "unknown"); ++ verbose(cbs->private_data, "(%02x) call %s#%d\n", insn->code, ++ __func_get_name(cbs, insn, ++ tmp, sizeof(tmp)), ++ insn->imm); ++ } ++ } else if (insn->code == (BPF_JMP | BPF_JA)) { ++ verbose(cbs->private_data, "(%02x) goto pc%+d\n", ++ insn->code, insn->off); ++ } else if (insn->code == (BPF_JMP | BPF_EXIT)) { ++ verbose(cbs->private_data, "(%02x) exit\n", insn->code); ++ } else if (BPF_SRC(insn->code) == BPF_X) { ++ verbose(cbs->private_data, ++ "(%02x) if %c%d %s %c%d goto pc%+d\n", ++ insn->code, class == BPF_JMP32 ? 'w' : 'r', ++ insn->dst_reg, ++ bpf_jmp_string[BPF_OP(insn->code) >> 4], ++ class == BPF_JMP32 ? 'w' : 'r', ++ insn->src_reg, insn->off); ++ } else { ++ verbose(cbs->private_data, ++ "(%02x) if %c%d %s 0x%x goto pc%+d\n", ++ insn->code, class == BPF_JMP32 ? 'w' : 'r', ++ insn->dst_reg, ++ bpf_jmp_string[BPF_OP(insn->code) >> 4], ++ insn->imm, insn->off); ++ } ++ } else { ++ verbose(cbs->private_data, "(%02x) %s\n", ++ insn->code, bpf_class_string[class]); ++ } ++} +--- /dev/null ++++ b/kernel/bpf/disasm.h +@@ -0,0 +1,40 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com ++ * Copyright (c) 2016 Facebook ++ */ ++ ++#ifndef __BPF_DISASM_H__ ++#define __BPF_DISASM_H__ ++ ++#include ++#include ++#include ++#ifndef __KERNEL__ ++#include ++#include ++#endif ++ ++extern const char *const bpf_alu_string[16]; ++extern const char *const bpf_class_string[8]; ++ ++const char *func_id_name(int id); ++ ++typedef __printf(2, 3) void (*bpf_insn_print_t)(void *private_data, ++ const char *, ...); ++typedef const char *(*bpf_insn_revmap_call_t)(void *private_data, ++ const struct bpf_insn *insn); ++typedef const char *(*bpf_insn_print_imm_t)(void *private_data, ++ const struct bpf_insn *insn, ++ __u64 full_imm); ++ ++struct bpf_insn_cbs { ++ bpf_insn_print_t cb_print; ++ bpf_insn_revmap_call_t cb_call; ++ bpf_insn_print_imm_t cb_imm; ++ void *private_data; ++}; ++ ++void print_bpf_insn(const struct bpf_insn_cbs *cbs, ++ const struct bpf_insn *insn, ++ bool allow_ptr_leaks); ++#endif +--- a/kernel/bpf/hashtab.c ++++ b/kernel/bpf/hashtab.c +@@ -1,147 +1,467 @@ ++// SPDX-License-Identifier: GPL-2.0-only + /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of version 2 of the GNU General Public +- * License as published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, but +- * WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. ++ * Copyright (c) 2016 Facebook + */ + #include ++#include + #include + #include +-#include ++#include ++#include ++#include ++#include "percpu_freelist.h" ++#include "bpf_lru_list.h" ++#include "map_in_map.h" ++ ++#define HTAB_CREATE_FLAG_MASK \ ++ (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ ++ BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED) ++ ++struct bucket { ++ struct hlist_nulls_head head; ++ raw_spinlock_t lock; ++}; + + struct bpf_htab { + struct bpf_map map; +- struct hlist_head *buckets; +- raw_spinlock_t lock; +- u32 count; /* number of elements in this hashtable */ ++ struct bucket *buckets; ++ void *elems; ++ union { ++ struct pcpu_freelist freelist; ++ struct bpf_lru lru; ++ }; ++ struct htab_elem *__percpu *extra_elems; ++ atomic_t count; /* number of elements in this hashtable */ + u32 n_buckets; /* number of hash buckets */ + u32 elem_size; /* size of each element in bytes */ ++ u32 hashrnd; + }; + + /* each htab element is struct htab_elem + key + value */ + struct htab_elem { +- struct hlist_node hash_node; +- struct rcu_head rcu; ++ union { ++ struct hlist_nulls_node hash_node; ++ struct { ++ void *padding; ++ union { ++ struct bpf_htab *htab; ++ struct pcpu_freelist_node fnode; ++ }; ++ }; ++ }; ++ union { ++ struct rcu_head rcu; ++ struct bpf_lru_node lru_node; ++ }; + u32 hash; + char key[0] __aligned(8); + }; + ++static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); ++ ++static bool htab_is_lru(const struct bpf_htab *htab) ++{ ++ return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH || ++ htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; ++} ++ ++static bool htab_is_percpu(const struct bpf_htab *htab) ++{ ++ return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH || ++ htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; ++} ++ ++static bool htab_is_prealloc(const struct bpf_htab *htab) ++{ ++ return !(htab->map.map_flags & BPF_F_NO_PREALLOC); ++} ++ ++static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, ++ void __percpu *pptr) ++{ ++ *(void __percpu **)(l->key + key_size) = pptr; ++} ++ ++static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) ++{ ++ return *(void __percpu **)(l->key + key_size); ++} ++ ++static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) ++{ ++ return *(void **)(l->key + roundup(map->key_size, 8)); ++} ++ ++static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) ++{ ++ return (struct htab_elem *) (htab->elems + i * htab->elem_size); ++} ++ ++static void htab_free_elems(struct bpf_htab *htab) ++{ ++ int i; ++ ++ if (!htab_is_percpu(htab)) ++ goto free_elems; ++ ++ for (i = 0; i < htab->map.max_entries; i++) { ++ void __percpu *pptr; ++ ++ pptr = htab_elem_get_ptr(get_htab_elem(htab, i), ++ htab->map.key_size); ++ free_percpu(pptr); ++ cond_resched(); ++ } ++free_elems: ++ bpf_map_area_free(htab->elems); ++} ++ ++static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, ++ u32 hash) ++{ ++ struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash); ++ struct htab_elem *l; ++ ++ if (node) { ++ l = container_of(node, struct htab_elem, lru_node); ++ memcpy(l->key, key, htab->map.key_size); ++ return l; ++ } ++ ++ return NULL; ++} ++ ++static int prealloc_init(struct bpf_htab *htab) ++{ ++ u32 num_entries = htab->map.max_entries; ++ int err = -ENOMEM, i; ++ ++ if (!htab_is_percpu(htab) && !htab_is_lru(htab)) ++ num_entries += num_possible_cpus(); ++ ++ htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries, ++ htab->map.numa_node); ++ if (!htab->elems) ++ return -ENOMEM; ++ ++ if (!htab_is_percpu(htab)) ++ goto skip_percpu_elems; ++ ++ for (i = 0; i < num_entries; i++) { ++ u32 size = round_up(htab->map.value_size, 8); ++ void __percpu *pptr; ++ ++ pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN); ++ if (!pptr) ++ goto free_elems; ++ htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, ++ pptr); ++ cond_resched(); ++ } ++ ++skip_percpu_elems: ++ if (htab_is_lru(htab)) ++ err = bpf_lru_init(&htab->lru, ++ htab->map.map_flags & BPF_F_NO_COMMON_LRU, ++ offsetof(struct htab_elem, hash) - ++ offsetof(struct htab_elem, lru_node), ++ htab_lru_map_delete_node, ++ htab); ++ else ++ err = pcpu_freelist_init(&htab->freelist); ++ ++ if (err) ++ goto free_elems; ++ ++ if (htab_is_lru(htab)) ++ bpf_lru_populate(&htab->lru, htab->elems, ++ offsetof(struct htab_elem, lru_node), ++ htab->elem_size, num_entries); ++ else ++ pcpu_freelist_populate(&htab->freelist, ++ htab->elems + offsetof(struct htab_elem, fnode), ++ htab->elem_size, num_entries); ++ ++ return 0; ++ ++free_elems: ++ htab_free_elems(htab); ++ return err; ++} ++ ++static void prealloc_destroy(struct bpf_htab *htab) ++{ ++ htab_free_elems(htab); ++ ++ if (htab_is_lru(htab)) ++ bpf_lru_destroy(&htab->lru); ++ else ++ pcpu_freelist_destroy(&htab->freelist); ++} ++ ++static int alloc_extra_elems(struct bpf_htab *htab) ++{ ++ struct htab_elem *__percpu *pptr, *l_new; ++ struct pcpu_freelist_node *l; ++ int cpu; ++ ++ pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8, ++ GFP_USER | __GFP_NOWARN); ++ if (!pptr) ++ return -ENOMEM; ++ ++ for_each_possible_cpu(cpu) { ++ l = pcpu_freelist_pop(&htab->freelist); ++ /* pop will succeed, since prealloc_init() ++ * preallocated extra num_possible_cpus elements ++ */ ++ l_new = container_of(l, struct htab_elem, fnode); ++ *per_cpu_ptr(pptr, cpu) = l_new; ++ } ++ htab->extra_elems = pptr; ++ return 0; ++} ++ + /* Called from syscall */ +-static struct bpf_map *htab_map_alloc(union bpf_attr *attr) ++static int htab_map_alloc_check(union bpf_attr *attr) + { +- struct bpf_htab *htab; +- int err, i; ++ bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || ++ attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); ++ bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || ++ attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); ++ /* percpu_lru means each cpu has its own LRU list. ++ * it is different from BPF_MAP_TYPE_PERCPU_HASH where ++ * the map's value itself is percpu. percpu_lru has ++ * nothing to do with the map's value. ++ */ ++ bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); ++ bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); ++ bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED); ++ int numa_node = bpf_map_attr_numa_node(attr); ++ ++ BUILD_BUG_ON(offsetof(struct htab_elem, htab) != ++ offsetof(struct htab_elem, hash_node.pprev)); ++ BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != ++ offsetof(struct htab_elem, hash_node.pprev)); ++ ++ if (lru && !capable(CAP_SYS_ADMIN)) ++ /* LRU implementation is much complicated than other ++ * maps. Hence, limit to CAP_SYS_ADMIN for now. ++ */ ++ return -EPERM; + +- htab = kzalloc(sizeof(*htab), GFP_USER); +- if (!htab) +- return ERR_PTR(-ENOMEM); ++ if (zero_seed && !capable(CAP_SYS_ADMIN)) ++ /* Guard against local DoS, and discourage production use. */ ++ return -EPERM; + +- /* mandatory map attributes */ +- htab->map.key_size = attr->key_size; +- htab->map.value_size = attr->value_size; +- htab->map.max_entries = attr->max_entries; ++ if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK || ++ !bpf_map_flags_access_ok(attr->map_flags)) ++ return -EINVAL; ++ ++ if (!lru && percpu_lru) ++ return -EINVAL; ++ ++ if (lru && !prealloc) ++ return -ENOTSUPP; ++ ++ if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) ++ return -EINVAL; + + /* check sanity of attributes. + * value_size == 0 may be allowed in the future to use map as a set + */ +- err = -EINVAL; +- if (htab->map.max_entries == 0 || htab->map.key_size == 0 || +- htab->map.value_size == 0) +- goto free_htab; +- +- /* hash table size must be power of 2 */ +- htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); ++ if (attr->max_entries == 0 || attr->key_size == 0 || ++ attr->value_size == 0) ++ return -EINVAL; + +- err = -E2BIG; +- if (htab->map.key_size > MAX_BPF_STACK) ++ if (attr->key_size > MAX_BPF_STACK) + /* eBPF programs initialize keys on stack, so they cannot be + * larger than max stack size + */ +- goto free_htab; ++ return -E2BIG; + +- if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) - ++ if (attr->value_size >= KMALLOC_MAX_SIZE - + MAX_BPF_STACK - sizeof(struct htab_elem)) + /* if value_size is bigger, the user space won't be able to + * access the elements via bpf syscall. This check also makes + * sure that the elem_size doesn't overflow and it's + * kmalloc-able later in htab_map_update_elem() + */ +- goto free_htab; ++ return -E2BIG; ++ ++ return 0; ++} ++ ++static struct bpf_map *htab_map_alloc(union bpf_attr *attr) ++{ ++ bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || ++ attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); ++ bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || ++ attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); ++ /* percpu_lru means each cpu has its own LRU list. ++ * it is different from BPF_MAP_TYPE_PERCPU_HASH where ++ * the map's value itself is percpu. percpu_lru has ++ * nothing to do with the map's value. ++ */ ++ bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); ++ bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); ++ struct bpf_htab *htab; ++ int err, i; ++ u64 cost; ++ ++ htab = kzalloc(sizeof(*htab), GFP_USER); ++ if (!htab) ++ return ERR_PTR(-ENOMEM); ++ ++ bpf_map_init_from_attr(&htab->map, attr); ++ ++ if (percpu_lru) { ++ /* ensure each CPU's lru list has >=1 elements. ++ * since we are at it, make each lru list has the same ++ * number of elements. ++ */ ++ htab->map.max_entries = roundup(attr->max_entries, ++ num_possible_cpus()); ++ if (htab->map.max_entries < attr->max_entries) ++ htab->map.max_entries = rounddown(attr->max_entries, ++ num_possible_cpus()); ++ } ++ ++ /* hash table size must be power of 2 */ ++ htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); + + htab->elem_size = sizeof(struct htab_elem) + +- round_up(htab->map.key_size, 8) + +- htab->map.value_size; ++ round_up(htab->map.key_size, 8); ++ if (percpu) ++ htab->elem_size += sizeof(void *); ++ else ++ htab->elem_size += round_up(htab->map.value_size, 8); + ++ err = -E2BIG; + /* prevent zero size kmalloc and check for u32 overflow */ + if (htab->n_buckets == 0 || +- htab->n_buckets > U32_MAX / sizeof(struct hlist_head)) ++ htab->n_buckets > U32_MAX / sizeof(struct bucket)) + goto free_htab; + +- if ((u64) htab->n_buckets * sizeof(struct hlist_head) + +- (u64) htab->elem_size * htab->map.max_entries >= +- U32_MAX - PAGE_SIZE) +- /* make sure page count doesn't overflow */ +- goto free_htab; ++ cost = (u64) htab->n_buckets * sizeof(struct bucket) + ++ (u64) htab->elem_size * htab->map.max_entries; + +- htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) + +- htab->elem_size * htab->map.max_entries, +- PAGE_SIZE) >> PAGE_SHIFT; ++ if (percpu) ++ cost += (u64) round_up(htab->map.value_size, 8) * ++ num_possible_cpus() * htab->map.max_entries; ++ else ++ cost += (u64) htab->elem_size * num_possible_cpus(); ++ ++ /* if map size is larger than memlock limit, reject it */ ++ err = bpf_map_charge_init(&htab->map.memory, cost); ++ if (err) ++ goto free_htab; + + err = -ENOMEM; +- htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head), +- GFP_USER | __GFP_NOWARN); ++ htab->buckets = bpf_map_area_alloc(htab->n_buckets * ++ sizeof(struct bucket), ++ htab->map.numa_node); ++ if (!htab->buckets) ++ goto free_charge; ++ ++ if (htab->map.map_flags & BPF_F_ZERO_SEED) ++ htab->hashrnd = 0; ++ else ++ htab->hashrnd = get_random_int(); + +- if (!htab->buckets) { +- htab->buckets = vmalloc(htab->n_buckets * sizeof(struct hlist_head)); +- if (!htab->buckets) +- goto free_htab; ++ for (i = 0; i < htab->n_buckets; i++) { ++ INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); ++ raw_spin_lock_init(&htab->buckets[i].lock); + } + +- for (i = 0; i < htab->n_buckets; i++) +- INIT_HLIST_HEAD(&htab->buckets[i]); +- +- raw_spin_lock_init(&htab->lock); +- htab->count = 0; ++ if (prealloc) { ++ err = prealloc_init(htab); ++ if (err) ++ goto free_buckets; ++ ++ if (!percpu && !lru) { ++ /* lru itself can remove the least used element, so ++ * there is no need for an extra elem during map_update. ++ */ ++ err = alloc_extra_elems(htab); ++ if (err) ++ goto free_prealloc; ++ } ++ } + + return &htab->map; + ++free_prealloc: ++ prealloc_destroy(htab); ++free_buckets: ++ bpf_map_area_free(htab->buckets); ++free_charge: ++ bpf_map_charge_finish(&htab->map.memory); + free_htab: + kfree(htab); + return ERR_PTR(err); + } + +-static inline u32 htab_map_hash(const void *key, u32 key_len) ++static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) + { +- return jhash(key, key_len, 0); ++ return jhash(key, key_len, hashrnd); + } + +-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) ++static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) + { + return &htab->buckets[hash & (htab->n_buckets - 1)]; + } + +-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, ++static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) ++{ ++ return &__select_bucket(htab, hash)->head; ++} ++ ++/* this lookup function can only be called with bucket lock taken */ ++static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, + void *key, u32 key_size) + { ++ struct hlist_nulls_node *n; + struct htab_elem *l; + +- hlist_for_each_entry_rcu(l, head, hash_node) ++ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) + if (l->hash == hash && !memcmp(&l->key, key, key_size)) + return l; + + return NULL; + } + +-/* Called from syscall or from eBPF program */ +-static void *htab_map_lookup_elem(struct bpf_map *map, void *key) ++/* can be called without bucket lock. it will repeat the loop in ++ * the unlikely event when elements moved from one bucket into another ++ * while link list is being walked ++ */ ++static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, ++ u32 hash, void *key, ++ u32 key_size, u32 n_buckets) ++{ ++ struct hlist_nulls_node *n; ++ struct htab_elem *l; ++ ++again: ++ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) ++ if (l->hash == hash && !memcmp(&l->key, key, key_size)) ++ return l; ++ ++ if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) ++ goto again; ++ ++ return NULL; ++} ++ ++/* Called from syscall or from eBPF program directly, so ++ * arguments have to match bpf_map_lookup_elem() exactly. ++ * The return value is adjusted by BPF instructions ++ * in htab_map_gen_lookup(). ++ */ ++static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) + { + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); +- struct hlist_head *head; ++ struct hlist_nulls_head *head; + struct htab_elem *l; + u32 hash, key_size; + +@@ -150,11 +470,18 @@ static void *htab_map_lookup_elem(struct + + key_size = map->key_size; + +- hash = htab_map_hash(key, key_size); ++ hash = htab_map_hash(key, key_size, htab->hashrnd); + + head = select_bucket(htab, hash); + +- l = lookup_elem_raw(head, hash, key, key_size); ++ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); ++ ++ return l; ++} ++ ++static void *htab_map_lookup_elem(struct bpf_map *map, void *key) ++{ ++ struct htab_elem *l = __htab_map_lookup_elem(map, key); + + if (l) + return l->key + round_up(map->key_size, 8); +@@ -162,33 +489,138 @@ static void *htab_map_lookup_elem(struct + return NULL; + } + ++/* inline bpf_map_lookup_elem() call. ++ * Instead of: ++ * bpf_prog ++ * bpf_map_lookup_elem ++ * map->ops->map_lookup_elem ++ * htab_map_lookup_elem ++ * __htab_map_lookup_elem ++ * do: ++ * bpf_prog ++ * __htab_map_lookup_elem ++ */ ++static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) ++{ ++ struct bpf_insn *insn = insn_buf; ++ const int ret = BPF_REG_0; ++ ++ BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, ++ (void *(*)(struct bpf_map *map, void *key))NULL)); ++ *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem)); ++ *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); ++ *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, ++ offsetof(struct htab_elem, key) + ++ round_up(map->key_size, 8)); ++ return insn - insn_buf; ++} ++ ++static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, ++ void *key, const bool mark) ++{ ++ struct htab_elem *l = __htab_map_lookup_elem(map, key); ++ ++ if (l) { ++ if (mark) ++ bpf_lru_node_set_ref(&l->lru_node); ++ return l->key + round_up(map->key_size, 8); ++ } ++ ++ return NULL; ++} ++ ++static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) ++{ ++ return __htab_lru_map_lookup_elem(map, key, true); ++} ++ ++static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) ++{ ++ return __htab_lru_map_lookup_elem(map, key, false); ++} ++ ++static u32 htab_lru_map_gen_lookup(struct bpf_map *map, ++ struct bpf_insn *insn_buf) ++{ ++ struct bpf_insn *insn = insn_buf; ++ const int ret = BPF_REG_0; ++ const int ref_reg = BPF_REG_1; ++ ++ BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, ++ (void *(*)(struct bpf_map *map, void *key))NULL)); ++ *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem)); ++ *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); ++ *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret, ++ offsetof(struct htab_elem, lru_node) + ++ offsetof(struct bpf_lru_node, ref)); ++ *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1); ++ *insn++ = BPF_ST_MEM(BPF_B, ret, ++ offsetof(struct htab_elem, lru_node) + ++ offsetof(struct bpf_lru_node, ref), ++ 1); ++ *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, ++ offsetof(struct htab_elem, key) + ++ round_up(map->key_size, 8)); ++ return insn - insn_buf; ++} ++ ++/* It is called from the bpf_lru_list when the LRU needs to delete ++ * older elements from the htab. ++ */ ++static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) ++{ ++ struct bpf_htab *htab = (struct bpf_htab *)arg; ++ struct htab_elem *l = NULL, *tgt_l; ++ struct hlist_nulls_head *head; ++ struct hlist_nulls_node *n; ++ unsigned long flags; ++ struct bucket *b; ++ ++ tgt_l = container_of(node, struct htab_elem, lru_node); ++ b = __select_bucket(htab, tgt_l->hash); ++ head = &b->head; ++ ++ raw_spin_lock_irqsave(&b->lock, flags); ++ ++ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) ++ if (l == tgt_l) { ++ hlist_nulls_del_rcu(&l->hash_node); ++ break; ++ } ++ ++ raw_spin_unlock_irqrestore(&b->lock, flags); ++ ++ return l == tgt_l; ++} ++ + /* Called from syscall */ + static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) + { + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); +- struct hlist_head *head; ++ struct hlist_nulls_head *head; + struct htab_elem *l, *next_l; + u32 hash, key_size; +- int i; ++ int i = 0; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + key_size = map->key_size; + +- hash = htab_map_hash(key, key_size); ++ if (!key) ++ goto find_first_elem; ++ ++ hash = htab_map_hash(key, key_size, htab->hashrnd); + + head = select_bucket(htab, hash); + + /* lookup the key */ +- l = lookup_elem_raw(head, hash, key, key_size); ++ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); + +- if (!l) { +- i = 0; ++ if (!l) + goto find_first_elem; +- } + + /* key was found, get next key in the same bucket */ +- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), ++ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), + struct htab_elem, hash_node); + + if (next_l) { +@@ -207,7 +639,7 @@ find_first_elem: + head = select_bucket(htab, i); + + /* pick first element in the bucket */ +- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), ++ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), + struct htab_elem, hash_node); + if (next_l) { + /* if it's not empty, just return it */ +@@ -216,90 +648,491 @@ find_first_elem: + } + } + +- /* itereated over all buckets and all elements */ ++ /* iterated over all buckets and all elements */ + return -ENOENT; + } + ++static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) ++{ ++ if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) ++ free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); ++ kfree(l); ++} ++ ++static void htab_elem_free_rcu(struct rcu_head *head) ++{ ++ struct htab_elem *l = container_of(head, struct htab_elem, rcu); ++ struct bpf_htab *htab = l->htab; ++ ++ htab_elem_free(htab, l); ++} ++ ++static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) ++{ ++ struct bpf_map *map = &htab->map; ++ void *ptr; ++ ++ if (map->ops->map_fd_put_ptr) { ++ ptr = fd_htab_map_get_ptr(map, l); ++ map->ops->map_fd_put_ptr(ptr); ++ } ++} ++ ++static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) ++{ ++ htab_put_fd_value(htab, l); ++ ++ if (htab_is_prealloc(htab)) { ++ __pcpu_freelist_push(&htab->freelist, &l->fnode); ++ } else { ++ atomic_dec(&htab->count); ++ l->htab = htab; ++ call_rcu(&l->rcu, htab_elem_free_rcu); ++ } ++} ++ ++static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, ++ void *value, bool onallcpus) ++{ ++ if (!onallcpus) { ++ /* copy true value_size bytes */ ++ memcpy(this_cpu_ptr(pptr), value, htab->map.value_size); ++ } else { ++ u32 size = round_up(htab->map.value_size, 8); ++ int off = 0, cpu; ++ ++ for_each_possible_cpu(cpu) { ++ bpf_long_memcpy(per_cpu_ptr(pptr, cpu), ++ value + off, size); ++ off += size; ++ } ++ } ++} ++ ++static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, ++ void *value, bool onallcpus) ++{ ++ /* When using prealloc and not setting the initial value on all cpus, ++ * zero-fill element values for other cpus (just as what happens when ++ * not using prealloc). Otherwise, bpf program has no way to ensure ++ * known initial values for cpus other than current one ++ * (onallcpus=false always when coming from bpf prog). ++ */ ++ if (htab_is_prealloc(htab) && !onallcpus) { ++ u32 size = round_up(htab->map.value_size, 8); ++ int current_cpu = raw_smp_processor_id(); ++ int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ if (cpu == current_cpu) ++ bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value, ++ size); ++ else ++ memset(per_cpu_ptr(pptr, cpu), 0, size); ++ } ++ } else { ++ pcpu_copy_value(htab, pptr, value, onallcpus); ++ } ++} ++ ++static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) ++{ ++ return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS && ++ BITS_PER_LONG == 64; ++} ++ ++static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, ++ void *value, u32 key_size, u32 hash, ++ bool percpu, bool onallcpus, ++ struct htab_elem *old_elem) ++{ ++ u32 size = htab->map.value_size; ++ bool prealloc = htab_is_prealloc(htab); ++ struct htab_elem *l_new, **pl_new; ++ void __percpu *pptr; ++ ++ if (prealloc) { ++ if (old_elem) { ++ /* if we're updating the existing element, ++ * use per-cpu extra elems to avoid freelist_pop/push ++ */ ++ pl_new = this_cpu_ptr(htab->extra_elems); ++ l_new = *pl_new; ++ htab_put_fd_value(htab, old_elem); ++ *pl_new = old_elem; ++ } else { ++ struct pcpu_freelist_node *l; ++ ++ l = __pcpu_freelist_pop(&htab->freelist); ++ if (!l) ++ return ERR_PTR(-E2BIG); ++ l_new = container_of(l, struct htab_elem, fnode); ++ } ++ } else { ++ if (atomic_inc_return(&htab->count) > htab->map.max_entries) ++ if (!old_elem) { ++ /* when map is full and update() is replacing ++ * old element, it's ok to allocate, since ++ * old element will be freed immediately. ++ * Otherwise return an error ++ */ ++ l_new = ERR_PTR(-E2BIG); ++ goto dec_count; ++ } ++ l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, ++ htab->map.numa_node); ++ if (!l_new) { ++ l_new = ERR_PTR(-ENOMEM); ++ goto dec_count; ++ } ++ check_and_init_map_lock(&htab->map, ++ l_new->key + round_up(key_size, 8)); ++ } ++ ++ memcpy(l_new->key, key, key_size); ++ if (percpu) { ++ size = round_up(size, 8); ++ if (prealloc) { ++ pptr = htab_elem_get_ptr(l_new, key_size); ++ } else { ++ /* alloc_percpu zero-fills */ ++ pptr = __alloc_percpu_gfp(size, 8, ++ GFP_ATOMIC | __GFP_NOWARN); ++ if (!pptr) { ++ kfree(l_new); ++ l_new = ERR_PTR(-ENOMEM); ++ goto dec_count; ++ } ++ } ++ ++ pcpu_init_value(htab, pptr, value, onallcpus); ++ ++ if (!prealloc) ++ htab_elem_set_ptr(l_new, key_size, pptr); ++ } else if (fd_htab_map_needs_adjust(htab)) { ++ size = round_up(size, 8); ++ memcpy(l_new->key + round_up(key_size, 8), value, size); ++ } else { ++ copy_map_value(&htab->map, ++ l_new->key + round_up(key_size, 8), ++ value); ++ } ++ ++ l_new->hash = hash; ++ return l_new; ++dec_count: ++ atomic_dec(&htab->count); ++ return l_new; ++} ++ ++static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, ++ u64 map_flags) ++{ ++ if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) ++ /* elem already exists */ ++ return -EEXIST; ++ ++ if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) ++ /* elem doesn't exist, cannot update it */ ++ return -ENOENT; ++ ++ return 0; ++} ++ + /* Called from syscall or from eBPF program */ + static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) + { + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); +- struct htab_elem *l_new, *l_old; +- struct hlist_head *head; ++ struct htab_elem *l_new = NULL, *l_old; ++ struct hlist_nulls_head *head; + unsigned long flags; +- u32 key_size; ++ struct bucket *b; ++ u32 key_size, hash; + int ret; + +- if (map_flags > BPF_EXIST) ++ if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) + /* unknown flags */ + return -EINVAL; + + WARN_ON_ONCE(!rcu_read_lock_held()); + +- /* allocate new element outside of lock */ +- l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); +- if (!l_new) +- return -ENOMEM; +- + key_size = map->key_size; + +- memcpy(l_new->key, key, key_size); +- memcpy(l_new->key + round_up(key_size, 8), value, map->value_size); ++ hash = htab_map_hash(key, key_size, htab->hashrnd); ++ ++ b = __select_bucket(htab, hash); ++ head = &b->head; + +- l_new->hash = htab_map_hash(l_new->key, key_size); ++ if (unlikely(map_flags & BPF_F_LOCK)) { ++ if (unlikely(!map_value_has_spin_lock(map))) ++ return -EINVAL; ++ /* find an element without taking the bucket lock */ ++ l_old = lookup_nulls_elem_raw(head, hash, key, key_size, ++ htab->n_buckets); ++ ret = check_flags(htab, l_old, map_flags); ++ if (ret) ++ return ret; ++ if (l_old) { ++ /* grab the element lock and update value in place */ ++ copy_map_value_locked(map, ++ l_old->key + round_up(key_size, 8), ++ value, false); ++ return 0; ++ } ++ /* fall through, grab the bucket lock and lookup again. ++ * 99.9% chance that the element won't be found, ++ * but second lookup under lock has to be done. ++ */ ++ } + + /* bpf_map_update_elem() can be called in_irq() */ +- raw_spin_lock_irqsave(&htab->lock, flags); ++ raw_spin_lock_irqsave(&b->lock, flags); + +- head = select_bucket(htab, l_new->hash); ++ l_old = lookup_elem_raw(head, hash, key, key_size); + +- l_old = lookup_elem_raw(head, l_new->hash, key, key_size); ++ ret = check_flags(htab, l_old, map_flags); ++ if (ret) ++ goto err; + +- if (!l_old && unlikely(htab->count >= map->max_entries)) { +- /* if elem with this 'key' doesn't exist and we've reached +- * max_entries limit, fail insertion of new elem ++ if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { ++ /* first lookup without the bucket lock didn't find the element, ++ * but second lookup with the bucket lock found it. ++ * This case is highly unlikely, but has to be dealt with: ++ * grab the element lock in addition to the bucket lock ++ * and update element in place + */ +- ret = -E2BIG; ++ copy_map_value_locked(map, ++ l_old->key + round_up(key_size, 8), ++ value, false); ++ ret = 0; + goto err; + } + +- if (l_old && map_flags == BPF_NOEXIST) { +- /* elem already exists */ +- ret = -EEXIST; ++ l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, ++ l_old); ++ if (IS_ERR(l_new)) { ++ /* all pre-allocated elements are in use or memory exhausted */ ++ ret = PTR_ERR(l_new); + goto err; + } + +- if (!l_old && map_flags == BPF_EXIST) { +- /* elem doesn't exist, cannot update it */ +- ret = -ENOENT; +- goto err; ++ /* add new element to the head of the list, so that ++ * concurrent search will find it before old elem ++ */ ++ hlist_nulls_add_head_rcu(&l_new->hash_node, head); ++ if (l_old) { ++ hlist_nulls_del_rcu(&l_old->hash_node); ++ if (!htab_is_prealloc(htab)) ++ free_htab_elem(htab, l_old); + } ++ ret = 0; ++err: ++ raw_spin_unlock_irqrestore(&b->lock, flags); ++ return ret; ++} + +- /* add new element to the head of the list, so that concurrent +- * search will find it before old elem ++static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, ++ u64 map_flags) ++{ ++ struct bpf_htab *htab = container_of(map, struct bpf_htab, map); ++ struct htab_elem *l_new, *l_old = NULL; ++ struct hlist_nulls_head *head; ++ unsigned long flags; ++ struct bucket *b; ++ u32 key_size, hash; ++ int ret; ++ ++ if (unlikely(map_flags > BPF_EXIST)) ++ /* unknown flags */ ++ return -EINVAL; ++ ++ WARN_ON_ONCE(!rcu_read_lock_held()); ++ ++ key_size = map->key_size; ++ ++ hash = htab_map_hash(key, key_size, htab->hashrnd); ++ ++ b = __select_bucket(htab, hash); ++ head = &b->head; ++ ++ /* For LRU, we need to alloc before taking bucket's ++ * spinlock because getting free nodes from LRU may need ++ * to remove older elements from htab and this removal ++ * operation will need a bucket lock. + */ +- hlist_add_head_rcu(&l_new->hash_node, head); ++ l_new = prealloc_lru_pop(htab, key, hash); ++ if (!l_new) ++ return -ENOMEM; ++ memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size); ++ ++ /* bpf_map_update_elem() can be called in_irq() */ ++ raw_spin_lock_irqsave(&b->lock, flags); ++ ++ l_old = lookup_elem_raw(head, hash, key, key_size); ++ ++ ret = check_flags(htab, l_old, map_flags); ++ if (ret) ++ goto err; ++ ++ /* add new element to the head of the list, so that ++ * concurrent search will find it before old elem ++ */ ++ hlist_nulls_add_head_rcu(&l_new->hash_node, head); ++ if (l_old) { ++ bpf_lru_node_set_ref(&l_new->lru_node); ++ hlist_nulls_del_rcu(&l_old->hash_node); ++ } ++ ret = 0; ++ ++err: ++ raw_spin_unlock_irqrestore(&b->lock, flags); ++ ++ if (ret) ++ bpf_lru_push_free(&htab->lru, &l_new->lru_node); ++ else if (l_old) ++ bpf_lru_push_free(&htab->lru, &l_old->lru_node); ++ ++ return ret; ++} ++ ++static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, ++ void *value, u64 map_flags, ++ bool onallcpus) ++{ ++ struct bpf_htab *htab = container_of(map, struct bpf_htab, map); ++ struct htab_elem *l_new = NULL, *l_old; ++ struct hlist_nulls_head *head; ++ unsigned long flags; ++ struct bucket *b; ++ u32 key_size, hash; ++ int ret; ++ ++ if (unlikely(map_flags > BPF_EXIST)) ++ /* unknown flags */ ++ return -EINVAL; ++ ++ WARN_ON_ONCE(!rcu_read_lock_held()); ++ ++ key_size = map->key_size; ++ ++ hash = htab_map_hash(key, key_size, htab->hashrnd); ++ ++ b = __select_bucket(htab, hash); ++ head = &b->head; ++ ++ /* bpf_map_update_elem() can be called in_irq() */ ++ raw_spin_lock_irqsave(&b->lock, flags); ++ ++ l_old = lookup_elem_raw(head, hash, key, key_size); ++ ++ ret = check_flags(htab, l_old, map_flags); ++ if (ret) ++ goto err; ++ + if (l_old) { +- hlist_del_rcu(&l_old->hash_node); +- kfree_rcu(l_old, rcu); ++ /* per-cpu hash map can update value in-place */ ++ pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), ++ value, onallcpus); + } else { +- htab->count++; ++ l_new = alloc_htab_elem(htab, key, value, key_size, ++ hash, true, onallcpus, NULL); ++ if (IS_ERR(l_new)) { ++ ret = PTR_ERR(l_new); ++ goto err; ++ } ++ hlist_nulls_add_head_rcu(&l_new->hash_node, head); + } +- raw_spin_unlock_irqrestore(&htab->lock, flags); ++ ret = 0; ++err: ++ raw_spin_unlock_irqrestore(&b->lock, flags); ++ return ret; ++} + +- return 0; ++static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, ++ void *value, u64 map_flags, ++ bool onallcpus) ++{ ++ struct bpf_htab *htab = container_of(map, struct bpf_htab, map); ++ struct htab_elem *l_new = NULL, *l_old; ++ struct hlist_nulls_head *head; ++ unsigned long flags; ++ struct bucket *b; ++ u32 key_size, hash; ++ int ret; ++ ++ if (unlikely(map_flags > BPF_EXIST)) ++ /* unknown flags */ ++ return -EINVAL; ++ ++ WARN_ON_ONCE(!rcu_read_lock_held()); ++ ++ key_size = map->key_size; ++ ++ hash = htab_map_hash(key, key_size, htab->hashrnd); ++ ++ b = __select_bucket(htab, hash); ++ head = &b->head; ++ ++ /* For LRU, we need to alloc before taking bucket's ++ * spinlock because LRU's elem alloc may need ++ * to remove older elem from htab and this removal ++ * operation will need a bucket lock. ++ */ ++ if (map_flags != BPF_EXIST) { ++ l_new = prealloc_lru_pop(htab, key, hash); ++ if (!l_new) ++ return -ENOMEM; ++ } ++ ++ /* bpf_map_update_elem() can be called in_irq() */ ++ raw_spin_lock_irqsave(&b->lock, flags); ++ ++ l_old = lookup_elem_raw(head, hash, key, key_size); ++ ++ ret = check_flags(htab, l_old, map_flags); ++ if (ret) ++ goto err; ++ ++ if (l_old) { ++ bpf_lru_node_set_ref(&l_old->lru_node); ++ ++ /* per-cpu hash map can update value in-place */ ++ pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), ++ value, onallcpus); ++ } else { ++ pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), ++ value, onallcpus); ++ hlist_nulls_add_head_rcu(&l_new->hash_node, head); ++ l_new = NULL; ++ } ++ ret = 0; + err: +- raw_spin_unlock_irqrestore(&htab->lock, flags); +- kfree(l_new); ++ raw_spin_unlock_irqrestore(&b->lock, flags); ++ if (l_new) ++ bpf_lru_push_free(&htab->lru, &l_new->lru_node); + return ret; + } + ++static int htab_percpu_map_update_elem(struct bpf_map *map, void *key, ++ void *value, u64 map_flags) ++{ ++ return __htab_percpu_map_update_elem(map, key, value, map_flags, false); ++} ++ ++static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, ++ void *value, u64 map_flags) ++{ ++ return __htab_lru_percpu_map_update_elem(map, key, value, map_flags, ++ false); ++} ++ + /* Called from syscall or from eBPF program */ + static int htab_map_delete_elem(struct bpf_map *map, void *key) + { + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); +- struct hlist_head *head; ++ struct hlist_nulls_head *head; ++ struct bucket *b; + struct htab_elem *l; + unsigned long flags; + u32 hash, key_size; +@@ -309,22 +1142,54 @@ static int htab_map_delete_elem(struct b + + key_size = map->key_size; + +- hash = htab_map_hash(key, key_size); ++ hash = htab_map_hash(key, key_size, htab->hashrnd); ++ b = __select_bucket(htab, hash); ++ head = &b->head; + +- raw_spin_lock_irqsave(&htab->lock, flags); ++ raw_spin_lock_irqsave(&b->lock, flags); + +- head = select_bucket(htab, hash); ++ l = lookup_elem_raw(head, hash, key, key_size); ++ ++ if (l) { ++ hlist_nulls_del_rcu(&l->hash_node); ++ free_htab_elem(htab, l); ++ ret = 0; ++ } ++ ++ raw_spin_unlock_irqrestore(&b->lock, flags); ++ return ret; ++} ++ ++static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) ++{ ++ struct bpf_htab *htab = container_of(map, struct bpf_htab, map); ++ struct hlist_nulls_head *head; ++ struct bucket *b; ++ struct htab_elem *l; ++ unsigned long flags; ++ u32 hash, key_size; ++ int ret = -ENOENT; ++ ++ WARN_ON_ONCE(!rcu_read_lock_held()); ++ ++ key_size = map->key_size; ++ ++ hash = htab_map_hash(key, key_size, htab->hashrnd); ++ b = __select_bucket(htab, hash); ++ head = &b->head; ++ ++ raw_spin_lock_irqsave(&b->lock, flags); + + l = lookup_elem_raw(head, hash, key, key_size); + + if (l) { +- hlist_del_rcu(&l->hash_node); +- htab->count--; +- kfree_rcu(l, rcu); ++ hlist_nulls_del_rcu(&l->hash_node); + ret = 0; + } + +- raw_spin_unlock_irqrestore(&htab->lock, flags); ++ raw_spin_unlock_irqrestore(&b->lock, flags); ++ if (l) ++ bpf_lru_push_free(&htab->lru, &l->lru_node); + return ret; + } + +@@ -333,14 +1198,13 @@ static void delete_all_elements(struct b + int i; + + for (i = 0; i < htab->n_buckets; i++) { +- struct hlist_head *head = select_bucket(htab, i); +- struct hlist_node *n; ++ struct hlist_nulls_head *head = select_bucket(htab, i); ++ struct hlist_nulls_node *n; + struct htab_elem *l; + +- hlist_for_each_entry_safe(l, n, head, hash_node) { +- hlist_del_rcu(&l->hash_node); +- htab->count--; +- kfree(l); ++ hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { ++ hlist_nulls_del_rcu(&l->hash_node); ++ htab_elem_free(htab, l); + } + } + } +@@ -357,31 +1221,320 @@ static void htab_map_free(struct bpf_map + */ + synchronize_rcu(); + +- /* some of kfree_rcu() callbacks for elements of this map may not have +- * executed. It's ok. Proceed to free residual elements and map itself ++ /* some of free_htab_elem() callbacks for elements of this map may ++ * not have executed. Wait for them. + */ +- delete_all_elements(htab); +- kvfree(htab->buckets); ++ rcu_barrier(); ++ if (!htab_is_prealloc(htab)) ++ delete_all_elements(htab); ++ else ++ prealloc_destroy(htab); ++ ++ free_percpu(htab->extra_elems); ++ bpf_map_area_free(htab->buckets); + kfree(htab); + } + +-static const struct bpf_map_ops htab_ops = { ++static void htab_map_seq_show_elem(struct bpf_map *map, void *key, ++ struct seq_file *m) ++{ ++ void *value; ++ ++ rcu_read_lock(); ++ ++ value = htab_map_lookup_elem(map, key); ++ if (!value) { ++ rcu_read_unlock(); ++ return; ++ } ++ ++ btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); ++ seq_puts(m, ": "); ++ btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); ++ seq_puts(m, "\n"); ++ ++ rcu_read_unlock(); ++} ++ ++const struct bpf_map_ops htab_map_ops = { ++ .map_alloc_check = htab_map_alloc_check, + .map_alloc = htab_map_alloc, + .map_free = htab_map_free, + .map_get_next_key = htab_map_get_next_key, + .map_lookup_elem = htab_map_lookup_elem, + .map_update_elem = htab_map_update_elem, + .map_delete_elem = htab_map_delete_elem, ++ .map_gen_lookup = htab_map_gen_lookup, ++ .map_seq_show_elem = htab_map_seq_show_elem, + }; + +-static struct bpf_map_type_list htab_type __read_mostly = { +- .ops = &htab_ops, +- .type = BPF_MAP_TYPE_HASH, ++const struct bpf_map_ops htab_lru_map_ops = { ++ .map_alloc_check = htab_map_alloc_check, ++ .map_alloc = htab_map_alloc, ++ .map_free = htab_map_free, ++ .map_get_next_key = htab_map_get_next_key, ++ .map_lookup_elem = htab_lru_map_lookup_elem, ++ .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, ++ .map_update_elem = htab_lru_map_update_elem, ++ .map_delete_elem = htab_lru_map_delete_elem, ++ .map_gen_lookup = htab_lru_map_gen_lookup, ++ .map_seq_show_elem = htab_map_seq_show_elem, + }; + +-static int __init register_htab_map(void) ++/* Called from eBPF program */ ++static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) + { +- bpf_register_map_type(&htab_type); +- return 0; ++ struct htab_elem *l = __htab_map_lookup_elem(map, key); ++ ++ if (l) ++ return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); ++ else ++ return NULL; + } +-late_initcall(register_htab_map); ++ ++static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) ++{ ++ struct htab_elem *l = __htab_map_lookup_elem(map, key); ++ ++ if (l) { ++ bpf_lru_node_set_ref(&l->lru_node); ++ return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); ++ } ++ ++ return NULL; ++} ++ ++int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) ++{ ++ struct htab_elem *l; ++ void __percpu *pptr; ++ int ret = -ENOENT; ++ int cpu, off = 0; ++ u32 size; ++ ++ /* per_cpu areas are zero-filled and bpf programs can only ++ * access 'value_size' of them, so copying rounded areas ++ * will not leak any kernel data ++ */ ++ size = round_up(map->value_size, 8); ++ rcu_read_lock(); ++ l = __htab_map_lookup_elem(map, key); ++ if (!l) ++ goto out; ++ /* We do not mark LRU map element here in order to not mess up ++ * eviction heuristics when user space does a map walk. ++ */ ++ pptr = htab_elem_get_ptr(l, map->key_size); ++ for_each_possible_cpu(cpu) { ++ bpf_long_memcpy(value + off, ++ per_cpu_ptr(pptr, cpu), size); ++ off += size; ++ } ++ ret = 0; ++out: ++ rcu_read_unlock(); ++ return ret; ++} ++ ++int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, ++ u64 map_flags) ++{ ++ struct bpf_htab *htab = container_of(map, struct bpf_htab, map); ++ int ret; ++ ++ rcu_read_lock(); ++ if (htab_is_lru(htab)) ++ ret = __htab_lru_percpu_map_update_elem(map, key, value, ++ map_flags, true); ++ else ++ ret = __htab_percpu_map_update_elem(map, key, value, map_flags, ++ true); ++ rcu_read_unlock(); ++ ++ return ret; ++} ++ ++static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key, ++ struct seq_file *m) ++{ ++ struct htab_elem *l; ++ void __percpu *pptr; ++ int cpu; ++ ++ rcu_read_lock(); ++ ++ l = __htab_map_lookup_elem(map, key); ++ if (!l) { ++ rcu_read_unlock(); ++ return; ++ } ++ ++ btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); ++ seq_puts(m, ": {\n"); ++ pptr = htab_elem_get_ptr(l, map->key_size); ++ for_each_possible_cpu(cpu) { ++ seq_printf(m, "\tcpu%d: ", cpu); ++ btf_type_seq_show(map->btf, map->btf_value_type_id, ++ per_cpu_ptr(pptr, cpu), m); ++ seq_puts(m, "\n"); ++ } ++ seq_puts(m, "}\n"); ++ ++ rcu_read_unlock(); ++} ++ ++const struct bpf_map_ops htab_percpu_map_ops = { ++ .map_alloc_check = htab_map_alloc_check, ++ .map_alloc = htab_map_alloc, ++ .map_free = htab_map_free, ++ .map_get_next_key = htab_map_get_next_key, ++ .map_lookup_elem = htab_percpu_map_lookup_elem, ++ .map_update_elem = htab_percpu_map_update_elem, ++ .map_delete_elem = htab_map_delete_elem, ++ .map_seq_show_elem = htab_percpu_map_seq_show_elem, ++}; ++ ++const struct bpf_map_ops htab_lru_percpu_map_ops = { ++ .map_alloc_check = htab_map_alloc_check, ++ .map_alloc = htab_map_alloc, ++ .map_free = htab_map_free, ++ .map_get_next_key = htab_map_get_next_key, ++ .map_lookup_elem = htab_lru_percpu_map_lookup_elem, ++ .map_update_elem = htab_lru_percpu_map_update_elem, ++ .map_delete_elem = htab_lru_map_delete_elem, ++ .map_seq_show_elem = htab_percpu_map_seq_show_elem, ++}; ++ ++static int fd_htab_map_alloc_check(union bpf_attr *attr) ++{ ++ if (attr->value_size != sizeof(u32)) ++ return -EINVAL; ++ return htab_map_alloc_check(attr); ++} ++ ++static void fd_htab_map_free(struct bpf_map *map) ++{ ++ struct bpf_htab *htab = container_of(map, struct bpf_htab, map); ++ struct hlist_nulls_node *n; ++ struct hlist_nulls_head *head; ++ struct htab_elem *l; ++ int i; ++ ++ for (i = 0; i < htab->n_buckets; i++) { ++ head = select_bucket(htab, i); ++ ++ hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { ++ void *ptr = fd_htab_map_get_ptr(map, l); ++ ++ map->ops->map_fd_put_ptr(ptr); ++ } ++ } ++ ++ htab_map_free(map); ++} ++ ++/* only called from syscall */ ++int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) ++{ ++ void **ptr; ++ int ret = 0; ++ ++ if (!map->ops->map_fd_sys_lookup_elem) ++ return -ENOTSUPP; ++ ++ rcu_read_lock(); ++ ptr = htab_map_lookup_elem(map, key); ++ if (ptr) ++ *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr)); ++ else ++ ret = -ENOENT; ++ rcu_read_unlock(); ++ ++ return ret; ++} ++ ++/* only called from syscall */ ++int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, ++ void *key, void *value, u64 map_flags) ++{ ++ void *ptr; ++ int ret; ++ u32 ufd = *(u32 *)value; ++ ++ ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); ++ if (IS_ERR(ptr)) ++ return PTR_ERR(ptr); ++ ++ ret = htab_map_update_elem(map, key, &ptr, map_flags); ++ if (ret) ++ map->ops->map_fd_put_ptr(ptr); ++ ++ return ret; ++} ++ ++static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) ++{ ++ struct bpf_map *map, *inner_map_meta; ++ ++ inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); ++ if (IS_ERR(inner_map_meta)) ++ return inner_map_meta; ++ ++ map = htab_map_alloc(attr); ++ if (IS_ERR(map)) { ++ bpf_map_meta_free(inner_map_meta); ++ return map; ++ } ++ ++ map->inner_map_meta = inner_map_meta; ++ ++ return map; ++} ++ ++static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) ++{ ++ struct bpf_map **inner_map = htab_map_lookup_elem(map, key); ++ ++ if (!inner_map) ++ return NULL; ++ ++ return READ_ONCE(*inner_map); ++} ++ ++static u32 htab_of_map_gen_lookup(struct bpf_map *map, ++ struct bpf_insn *insn_buf) ++{ ++ struct bpf_insn *insn = insn_buf; ++ const int ret = BPF_REG_0; ++ ++ BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, ++ (void *(*)(struct bpf_map *map, void *key))NULL)); ++ *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem)); ++ *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); ++ *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, ++ offsetof(struct htab_elem, key) + ++ round_up(map->key_size, 8)); ++ *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); ++ ++ return insn - insn_buf; ++} ++ ++static void htab_of_map_free(struct bpf_map *map) ++{ ++ bpf_map_meta_free(map->inner_map_meta); ++ fd_htab_map_free(map); ++} ++ ++const struct bpf_map_ops htab_of_maps_map_ops = { ++ .map_alloc_check = fd_htab_map_alloc_check, ++ .map_alloc = htab_of_map_alloc, ++ .map_free = htab_of_map_free, ++ .map_get_next_key = htab_map_get_next_key, ++ .map_lookup_elem = htab_of_map_lookup_elem, ++ .map_delete_elem = htab_map_delete_elem, ++ .map_fd_get_ptr = bpf_map_fd_get_ptr, ++ .map_fd_put_ptr = bpf_map_fd_put_ptr, ++ .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, ++ .map_gen_lookup = htab_of_map_gen_lookup, ++ .map_check_btf = map_check_no_btf, ++}; +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -1,21 +1,18 @@ ++// SPDX-License-Identifier: GPL-2.0-only + /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of version 2 of the GNU General Public +- * License as published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, but +- * WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. + */ + #include + #include + #include + #include ++#include + #include + #include + #include ++#include ++#include ++ ++#include "../../lib/kstrtox.h" + + /* If kernel subsystem is allowing eBPF programs to call this function, + * inside its own verifier_ops->get_func_proto() callback it should return +@@ -26,48 +23,32 @@ + * if program is allowed to access maps, so check rcu_read_lock_held in + * all three functions. + */ +-static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) ++BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) + { +- /* verifier checked that R1 contains a valid pointer to bpf_map +- * and R2 points to a program stack and map->key_size bytes were +- * initialized +- */ +- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; +- void *key = (void *) (unsigned long) r2; +- void *value; +- + WARN_ON_ONCE(!rcu_read_lock_held()); +- +- value = map->ops->map_lookup_elem(map, key); +- +- /* lookup() returns either pointer to element value or NULL +- * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type +- */ +- return (unsigned long) value; ++ return (unsigned long) map->ops->map_lookup_elem(map, key); + } + + const struct bpf_func_proto bpf_map_lookup_elem_proto = { + .func = bpf_map_lookup_elem, + .gpl_only = false, ++ .pkt_access = true, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, + }; + +-static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) ++BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, ++ void *, value, u64, flags) + { +- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; +- void *key = (void *) (unsigned long) r2; +- void *value = (void *) (unsigned long) r3; +- + WARN_ON_ONCE(!rcu_read_lock_held()); +- +- return map->ops->map_update_elem(map, key, value, r4); ++ return map->ops->map_update_elem(map, key, value, flags); + } + + const struct bpf_func_proto bpf_map_update_elem_proto = { + .func = bpf_map_update_elem, + .gpl_only = false, ++ .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, +@@ -75,33 +56,71 @@ const struct bpf_func_proto bpf_map_upda + .arg4_type = ARG_ANYTHING, + }; + +-static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) ++BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) + { +- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; +- void *key = (void *) (unsigned long) r2; +- + WARN_ON_ONCE(!rcu_read_lock_held()); +- + return map->ops->map_delete_elem(map, key); + } + + const struct bpf_func_proto bpf_map_delete_elem_proto = { + .func = bpf_map_delete_elem, + .gpl_only = false, ++ .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, + }; + ++BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) ++{ ++ return map->ops->map_push_elem(map, value, flags); ++} ++ ++const struct bpf_func_proto bpf_map_push_elem_proto = { ++ .func = bpf_map_push_elem, ++ .gpl_only = false, ++ .pkt_access = true, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_CONST_MAP_PTR, ++ .arg2_type = ARG_PTR_TO_MAP_VALUE, ++ .arg3_type = ARG_ANYTHING, ++}; ++ ++BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) ++{ ++ return map->ops->map_pop_elem(map, value); ++} ++ ++const struct bpf_func_proto bpf_map_pop_elem_proto = { ++ .func = bpf_map_pop_elem, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_CONST_MAP_PTR, ++ .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, ++}; ++ ++BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) ++{ ++ return map->ops->map_peek_elem(map, value); ++} ++ ++const struct bpf_func_proto bpf_map_peek_elem_proto = { ++ .func = bpf_map_peek_elem, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_CONST_MAP_PTR, ++ .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, ++}; ++ + const struct bpf_func_proto bpf_get_prandom_u32_proto = { + .func = bpf_user_rnd_u32, + .gpl_only = false, + .ret_type = RET_INTEGER, + }; + +-static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) ++BPF_CALL_0(bpf_get_smp_processor_id) + { +- return raw_smp_processor_id(); ++ return smp_processor_id(); + } + + const struct bpf_func_proto bpf_get_smp_processor_id_proto = { +@@ -110,7 +129,18 @@ const struct bpf_func_proto bpf_get_smp_ + .ret_type = RET_INTEGER, + }; + +-static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) ++BPF_CALL_0(bpf_get_numa_node_id) ++{ ++ return numa_node_id(); ++} ++ ++const struct bpf_func_proto bpf_get_numa_node_id_proto = { ++ .func = bpf_get_numa_node_id, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++}; ++ ++BPF_CALL_0(bpf_ktime_get_ns) + { + /* NMI safe access to clock monotonic */ + return ktime_get_mono_fast_ns(); +@@ -122,11 +152,11 @@ const struct bpf_func_proto bpf_ktime_ge + .ret_type = RET_INTEGER, + }; + +-static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) ++BPF_CALL_0(bpf_get_current_pid_tgid) + { + struct task_struct *task = current; + +- if (!task) ++ if (unlikely(!task)) + return -EINVAL; + + return (u64) task->tgid << 32 | task->pid; +@@ -138,18 +168,18 @@ const struct bpf_func_proto bpf_get_curr + .ret_type = RET_INTEGER, + }; + +-static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) ++BPF_CALL_0(bpf_get_current_uid_gid) + { + struct task_struct *task = current; + kuid_t uid; + kgid_t gid; + +- if (!task) ++ if (unlikely(!task)) + return -EINVAL; + + current_uid_gid(&uid, &gid); + return (u64) from_kgid(&init_user_ns, gid) << 32 | +- from_kuid(&init_user_ns, uid); ++ from_kuid(&init_user_ns, uid); + } + + const struct bpf_func_proto bpf_get_current_uid_gid_proto = { +@@ -158,22 +188,254 @@ const struct bpf_func_proto bpf_get_curr + .ret_type = RET_INTEGER, + }; + +-static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5) ++BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) + { + struct task_struct *task = current; +- char *buf = (char *) (long) r1; + +- if (!task) +- return -EINVAL; ++ if (unlikely(!task)) ++ goto err_clear; ++ ++ strncpy(buf, task->comm, size); + +- strlcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm))); ++ /* Verifier guarantees that size > 0. For task->comm exceeding ++ * size, guarantee that buf is %NUL-terminated. Unconditionally ++ * done here to save the size test. ++ */ ++ buf[size - 1] = 0; + return 0; ++err_clear: ++ memset(buf, 0, size); ++ return -EINVAL; + } + + const struct bpf_func_proto bpf_get_current_comm_proto = { + .func = bpf_get_current_comm, + .gpl_only = false, + .ret_type = RET_INTEGER, +- .arg1_type = ARG_PTR_TO_STACK, +- .arg2_type = ARG_CONST_STACK_SIZE, ++ .arg1_type = ARG_PTR_TO_UNINIT_MEM, ++ .arg2_type = ARG_CONST_SIZE, ++}; ++ ++#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) ++ ++static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) ++{ ++ arch_spinlock_t *l = (void *)lock; ++ union { ++ __u32 val; ++ arch_spinlock_t lock; ++ } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; ++ ++ compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); ++ BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); ++ BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); ++ arch_spin_lock(l); ++} ++ ++static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) ++{ ++ arch_spinlock_t *l = (void *)lock; ++ ++ arch_spin_unlock(l); ++} ++ ++#else ++ ++static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) ++{ ++ atomic_t *l = (void *)lock; ++ ++ BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); ++ do { ++ smp_cond_load_relaxed(&l->counter, !VAL); ++ } while (atomic_xchg(l, 1)); ++} ++ ++static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) ++{ ++ atomic_t *l = (void *)lock; ++ ++ atomic_set_release(l, 0); ++} ++ ++#endif ++ ++static DEFINE_PER_CPU(unsigned long, irqsave_flags); ++ ++notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ __bpf_spin_lock(lock); ++ __this_cpu_write(irqsave_flags, flags); ++ return 0; ++} ++ ++const struct bpf_func_proto bpf_spin_lock_proto = { ++ .func = bpf_spin_lock, ++ .gpl_only = false, ++ .ret_type = RET_VOID, ++ .arg1_type = ARG_PTR_TO_SPIN_LOCK, ++}; ++ ++notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) ++{ ++ unsigned long flags; ++ ++ flags = __this_cpu_read(irqsave_flags); ++ __bpf_spin_unlock(lock); ++ local_irq_restore(flags); ++ return 0; ++} ++ ++const struct bpf_func_proto bpf_spin_unlock_proto = { ++ .func = bpf_spin_unlock, ++ .gpl_only = false, ++ .ret_type = RET_VOID, ++ .arg1_type = ARG_PTR_TO_SPIN_LOCK, ++}; ++ ++void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, ++ bool lock_src) ++{ ++ struct bpf_spin_lock *lock; ++ ++ if (lock_src) ++ lock = src + map->spin_lock_off; ++ else ++ lock = dst + map->spin_lock_off; ++ preempt_disable(); ++ ____bpf_spin_lock(lock); ++ copy_map_value(map, dst, src); ++ ____bpf_spin_unlock(lock); ++ preempt_enable(); ++} ++ ++#define BPF_STRTOX_BASE_MASK 0x1F ++ ++static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, ++ unsigned long long *res, bool *is_negative) ++{ ++ unsigned int base = flags & BPF_STRTOX_BASE_MASK; ++ const char *cur_buf = buf; ++ size_t cur_len = buf_len; ++ unsigned int consumed; ++ size_t val_len; ++ char str[64]; ++ ++ if (!buf || !buf_len || !res || !is_negative) ++ return -EINVAL; ++ ++ if (base != 0 && base != 8 && base != 10 && base != 16) ++ return -EINVAL; ++ ++ if (flags & ~BPF_STRTOX_BASE_MASK) ++ return -EINVAL; ++ ++ while (cur_buf < buf + buf_len && isspace(*cur_buf)) ++ ++cur_buf; ++ ++ *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-'); ++ if (*is_negative) ++ ++cur_buf; ++ ++ consumed = cur_buf - buf; ++ cur_len -= consumed; ++ if (!cur_len) ++ return -EINVAL; ++ ++ cur_len = min(cur_len, sizeof(str) - 1); ++ memcpy(str, cur_buf, cur_len); ++ str[cur_len] = '\0'; ++ cur_buf = str; ++ ++ cur_buf = _parse_integer_fixup_radix(cur_buf, &base); ++ val_len = _parse_integer(cur_buf, base, res); ++ ++ if (val_len & KSTRTOX_OVERFLOW) ++ return -ERANGE; ++ ++ if (val_len == 0) ++ return -EINVAL; ++ ++ cur_buf += val_len; ++ consumed += cur_buf - str; ++ ++ return consumed; ++} ++ ++static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, ++ long long *res) ++{ ++ unsigned long long _res; ++ bool is_negative; ++ int err; ++ ++ err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); ++ if (err < 0) ++ return err; ++ if (is_negative) { ++ if ((long long)-_res > 0) ++ return -ERANGE; ++ *res = -_res; ++ } else { ++ if ((long long)_res < 0) ++ return -ERANGE; ++ *res = _res; ++ } ++ return err; ++} ++ ++BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, ++ long *, res) ++{ ++ long long _res; ++ int err; ++ ++ err = __bpf_strtoll(buf, buf_len, flags, &_res); ++ if (err < 0) ++ return err; ++ if (_res != (long)_res) ++ return -ERANGE; ++ *res = _res; ++ return err; ++} ++ ++const struct bpf_func_proto bpf_strtol_proto = { ++ .func = bpf_strtol, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_CONST_SIZE, ++ .arg3_type = ARG_ANYTHING, ++ .arg4_type = ARG_PTR_TO_LONG, ++}; ++ ++BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, ++ unsigned long *, res) ++{ ++ unsigned long long _res; ++ bool is_negative; ++ int err; ++ ++ err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); ++ if (err < 0) ++ return err; ++ if (is_negative) ++ return -EINVAL; ++ if (_res != (unsigned long)_res) ++ return -ERANGE; ++ *res = _res; ++ return err; ++} ++ ++const struct bpf_func_proto bpf_strtoul_proto = { ++ .func = bpf_strtoul, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_CONST_SIZE, ++ .arg3_type = ARG_ANYTHING, ++ .arg4_type = ARG_PTR_TO_LONG, + }; +--- a/kernel/bpf/inode.c ++++ b/kernel/bpf/inode.c +@@ -1,3 +1,4 @@ ++// SPDX-License-Identifier: GPL-2.0-only + /* + * Minimal file system backend for holding eBPF maps and programs, + * used by bpf(2) object pinning. +@@ -5,21 +6,19 @@ + * Authors: + * + * Daniel Borkmann +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * version 2 as published by the Free Software Foundation. + */ + +-#include ++#include + #include + #include + #include + #include + #include + #include ++#include + #include + #include ++#include + + enum bpf_type { + BPF_TYPE_UNSPEC = 0, +@@ -87,6 +86,7 @@ static struct inode *bpf_get_inode(struc + switch (mode & S_IFMT) { + case S_IFDIR: + case S_IFREG: ++ case S_IFLNK: + break; + default: + return ERR_PTR(-EINVAL); +@@ -119,18 +119,20 @@ static int bpf_inode_type(const struct i + return 0; + } + +-static bool bpf_dname_reserved(const struct dentry *dentry) ++static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, ++ struct inode *dir) + { +- return strchr(dentry->d_name.name, '.'); ++ d_instantiate(dentry, inode); ++ dget(dentry); ++ ++ dir->i_mtime = CURRENT_TIME; ++ dir->i_ctime = dir->i_mtime; + } + + static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) + { + struct inode *inode; + +- if (bpf_dname_reserved(dentry)) +- return -EPERM; +- + inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); + if (IS_ERR(inode)) + return PTR_ERR(inode); +@@ -141,30 +143,30 @@ static int bpf_mkdir(struct inode *dir, + inc_nlink(inode); + inc_nlink(dir); + +- d_instantiate(dentry, inode); +- dget(dentry); +- ++ bpf_dentry_finalize(dentry, inode, dir); + return 0; + } + +-static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, +- umode_t mode, const struct inode_operations *iops) ++static int bpffs_obj_open(struct inode *inode, struct file *file) + { +- struct inode *inode; ++ return -EIO; ++} + +- if (bpf_dname_reserved(dentry)) +- return -EPERM; ++static const struct file_operations bpffs_obj_fops = { ++ .open = bpffs_obj_open, ++}; + +- inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); ++static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, ++ umode_t mode, const struct inode_operations *iops) ++{ ++ struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = iops; + inode->i_private = dentry->d_fsdata; + +- d_instantiate(dentry, inode); +- dget(dentry); +- ++ bpf_dentry_finalize(dentry, inode, dir); + return 0; + } + +@@ -187,11 +189,48 @@ static int bpf_mkobj(struct inode *dir, + } + } + ++static struct dentry * ++bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) ++{ ++ /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future ++ * extensions. ++ */ ++ if (strchr(dentry->d_name.name, '.')) ++ return ERR_PTR(-EPERM); ++ ++ return simple_lookup(dir, dentry, flags); ++} ++ ++static int bpf_symlink(struct inode *dir, struct dentry *dentry, ++ const char *target) ++{ ++ char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); ++ struct inode *inode; ++ ++ if (!link) ++ return -ENOMEM; ++ ++ inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); ++ if (IS_ERR(inode)) { ++ kfree(link); ++ return PTR_ERR(inode); ++ } ++ ++ inode->i_op = &simple_symlink_inode_operations; ++ inode->i_link = link; ++ ++ bpf_dentry_finalize(dentry, inode, dir); ++ return 0; ++} ++ + static const struct inode_operations bpf_dir_iops = { +- .lookup = simple_lookup, ++ .lookup = bpf_lookup, + .mknod = bpf_mkobj, + .mkdir = bpf_mkdir, ++ .symlink = bpf_symlink, + .rmdir = simple_rmdir, ++ .rename = simple_rename, ++ .link = simple_link, + .unlink = simple_unlink, + }; + +@@ -256,7 +295,7 @@ out: + } + + static void *bpf_obj_do_get(const struct filename *pathname, +- enum bpf_type *type) ++ enum bpf_type *type, int flags) + { + struct inode *inode; + struct path path; +@@ -268,7 +307,7 @@ static void *bpf_obj_do_get(const struct + return ERR_PTR(ret); + + inode = d_backing_inode(path.dentry); +- ret = inode_permission(inode, MAY_WRITE); ++ ret = inode_permission(inode, ACC_MODE(flags)); + if (ret) + goto out; + +@@ -287,18 +326,23 @@ out: + return ERR_PTR(ret); + } + +-int bpf_obj_get_user(const char __user *pathname) ++int bpf_obj_get_user(const char __user *pathname, int flags) + { + enum bpf_type type = BPF_TYPE_UNSPEC; + struct filename *pname; + int ret = -ENOENT; ++ int f_flags; + void *raw; + ++ f_flags = bpf_get_file_flag(flags); ++ if (f_flags < 0) ++ return f_flags; ++ + pname = getname(pathname); + if (IS_ERR(pname)) + return PTR_ERR(pname); + +- raw = bpf_obj_do_get(pname, &type); ++ raw = bpf_obj_do_get(pname, &type, f_flags); + if (IS_ERR(raw)) { + ret = PTR_ERR(raw); + goto out; +@@ -307,7 +351,7 @@ int bpf_obj_get_user(const char __user * + if (type == BPF_TYPE_PROG) + ret = bpf_prog_new_fd(raw); + else if (type == BPF_TYPE_MAP) +- ret = bpf_map_new_fd(raw); ++ ret = bpf_map_new_fd(raw, f_flags); + else + goto out; + +@@ -318,29 +362,131 @@ out: + return ret; + } + +-static void bpf_evict_inode(struct inode *inode) ++static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) + { +- enum bpf_type type; ++ struct bpf_prog *prog; ++ int ret = inode_permission(inode, MAY_READ); ++ if (ret) ++ return ERR_PTR(ret); ++ ++ if (inode->i_op == &bpf_map_iops) ++ return ERR_PTR(-EINVAL); ++ if (inode->i_op != &bpf_prog_iops) ++ return ERR_PTR(-EACCES); + +- truncate_inode_pages_final(&inode->i_data); +- clear_inode(inode); ++ prog = inode->i_private; + ++ if (!bpf_prog_get_ok(prog, &type, false)) ++ return ERR_PTR(-EINVAL); ++ ++ return bpf_prog_inc(prog); ++} ++ ++struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) ++{ ++ struct bpf_prog *prog; ++ struct path path; ++ int ret = kern_path(name, LOOKUP_FOLLOW, &path); ++ if (ret) ++ return ERR_PTR(ret); ++ prog = __get_prog_inode(d_backing_inode(path.dentry), type); ++ if (!IS_ERR(prog)) ++ touch_atime(&path); ++ path_put(&path); ++ return prog; ++} ++EXPORT_SYMBOL(bpf_prog_get_type_path); ++ ++/* ++ * Display the mount options in /proc/mounts. ++ */ ++static int bpf_show_options(struct seq_file *m, struct dentry *root) ++{ ++ umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX; ++ ++ if (mode != S_IRWXUGO) ++ seq_printf(m, ",mode=%o", mode); ++ return 0; ++} ++ ++static void bpf_destroy_inode_deferred(struct rcu_head *head) ++{ ++ struct inode *inode = container_of(head, struct inode, i_rcu); ++ enum bpf_type type; ++ ++ if (S_ISLNK(inode->i_mode)) ++ kfree(inode->i_link); + if (!bpf_inode_type(inode, &type)) + bpf_any_put(inode->i_private, type); ++ free_inode_nonrcu(inode); ++} ++ ++static void bpf_destroy_inode(struct inode *inode) ++{ ++ call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred); + } + + static const struct super_operations bpf_super_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +- .evict_inode = bpf_evict_inode, ++ .show_options = bpf_show_options, ++ .destroy_inode = bpf_destroy_inode, ++}; ++ ++enum { ++ OPT_MODE, ++ OPT_ERR, ++}; ++ ++static const match_table_t bpf_mount_tokens = { ++ { OPT_MODE, "mode=%o" }, ++ { OPT_ERR, NULL }, ++}; ++ ++struct bpf_mount_opts { ++ umode_t mode; + }; + ++static int bpf_parse_options(char *data, struct bpf_mount_opts *opts) ++{ ++ substring_t args[MAX_OPT_ARGS]; ++ int option, token; ++ char *ptr; ++ ++ opts->mode = S_IRWXUGO; ++ ++ while ((ptr = strsep(&data, ",")) != NULL) { ++ if (!*ptr) ++ continue; ++ ++ token = match_token(ptr, bpf_mount_tokens, args); ++ switch (token) { ++ case OPT_MODE: ++ if (match_octal(&args[0], &option)) ++ return -EINVAL; ++ opts->mode = option & S_IALLUGO; ++ break; ++ /* We might like to report bad mount options here, but ++ * traditionally we've ignored all mount options, so we'd ++ * better continue to ignore non-existing options for bpf. ++ */ ++ } ++ } ++ ++ return 0; ++} ++ + static int bpf_fill_super(struct super_block *sb, void *data, int silent) + { + static struct tree_descr bpf_rfiles[] = { { "" } }; ++ struct bpf_mount_opts opts; + struct inode *inode; + int ret; + ++ ret = bpf_parse_options(data, &opts); ++ if (ret) ++ return ret; ++ + ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); + if (ret) + return ret; +@@ -350,7 +496,7 @@ static int bpf_fill_super(struct super_b + inode = sb->s_root->d_inode; + inode->i_op = &bpf_dir_iops; + inode->i_mode &= ~S_IALLUGO; +- inode->i_mode |= S_ISVTX | S_IRWXUGO; ++ inode->i_mode |= S_ISVTX | opts.mode; + + return 0; + } +@@ -368,8 +514,6 @@ static struct file_system_type bpf_fs_ty + .kill_sb = kill_litter_super, + }; + +-MODULE_ALIAS_FS("bpf"); +- + static int __init bpf_init(void) + { + int ret; +--- /dev/null ++++ b/kernel/bpf/local_storage.c +@@ -0,0 +1,600 @@ ++//SPDX-License-Identifier: GPL-2.0 ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); ++ ++#ifdef CONFIG_CGROUP_BPF ++ ++#define LOCAL_STORAGE_CREATE_FLAG_MASK \ ++ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) ++ ++struct bpf_cgroup_storage_map { ++ struct bpf_map map; ++ ++ spinlock_t lock; ++ struct bpf_prog *prog; ++ struct rb_root root; ++ struct list_head list; ++}; ++ ++static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map) ++{ ++ return container_of(map, struct bpf_cgroup_storage_map, map); ++} ++ ++static int bpf_cgroup_storage_key_cmp( ++ const struct bpf_cgroup_storage_key *key1, ++ const struct bpf_cgroup_storage_key *key2) ++{ ++ if (key1->cgroup_inode_id < key2->cgroup_inode_id) ++ return -1; ++ else if (key1->cgroup_inode_id > key2->cgroup_inode_id) ++ return 1; ++ else if (key1->attach_type < key2->attach_type) ++ return -1; ++ else if (key1->attach_type > key2->attach_type) ++ return 1; ++ return 0; ++} ++ ++static struct bpf_cgroup_storage *cgroup_storage_lookup( ++ struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key, ++ bool locked) ++{ ++ struct rb_root *root = &map->root; ++ struct rb_node *node; ++ ++ if (!locked) ++ spin_lock_bh(&map->lock); ++ ++ node = root->rb_node; ++ while (node) { ++ struct bpf_cgroup_storage *storage; ++ ++ storage = container_of(node, struct bpf_cgroup_storage, node); ++ ++ switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) { ++ case -1: ++ node = node->rb_left; ++ break; ++ case 1: ++ node = node->rb_right; ++ break; ++ default: ++ if (!locked) ++ spin_unlock_bh(&map->lock); ++ return storage; ++ } ++ } ++ ++ if (!locked) ++ spin_unlock_bh(&map->lock); ++ ++ return NULL; ++} ++ ++static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map, ++ struct bpf_cgroup_storage *storage) ++{ ++ struct rb_root *root = &map->root; ++ struct rb_node **new = &(root->rb_node), *parent = NULL; ++ ++ while (*new) { ++ struct bpf_cgroup_storage *this; ++ ++ this = container_of(*new, struct bpf_cgroup_storage, node); ++ ++ parent = *new; ++ switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) { ++ case -1: ++ new = &((*new)->rb_left); ++ break; ++ case 1: ++ new = &((*new)->rb_right); ++ break; ++ default: ++ return -EEXIST; ++ } ++ } ++ ++ rb_link_node(&storage->node, parent, new); ++ rb_insert_color(&storage->node, root); ++ ++ return 0; ++} ++ ++static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key) ++{ ++ struct bpf_cgroup_storage_map *map = map_to_storage(_map); ++ struct bpf_cgroup_storage_key *key = _key; ++ struct bpf_cgroup_storage *storage; ++ ++ storage = cgroup_storage_lookup(map, key, false); ++ if (!storage) ++ return NULL; ++ ++ return &READ_ONCE(storage->buf)->data[0]; ++} ++ ++static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, ++ void *value, u64 flags) ++{ ++ struct bpf_cgroup_storage_key *key = _key; ++ struct bpf_cgroup_storage *storage; ++ struct bpf_storage_buffer *new; ++ ++ if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST))) ++ return -EINVAL; ++ ++ if (unlikely(flags & BPF_NOEXIST)) ++ return -EINVAL; ++ ++ if (unlikely((flags & BPF_F_LOCK) && ++ !map_value_has_spin_lock(map))) ++ return -EINVAL; ++ ++ storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, ++ key, false); ++ if (!storage) ++ return -ENOENT; ++ ++ if (flags & BPF_F_LOCK) { ++ copy_map_value_locked(map, storage->buf->data, value, false); ++ return 0; ++ } ++ ++ new = kmalloc_node(sizeof(struct bpf_storage_buffer) + ++ map->value_size, ++ __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, ++ map->numa_node); ++ if (!new) ++ return -ENOMEM; ++ ++ memcpy(&new->data[0], value, map->value_size); ++ check_and_init_map_lock(map, new->data); ++ ++ new = xchg(&storage->buf, new); ++ kfree_rcu(new, rcu); ++ ++ return 0; ++} ++ ++int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key, ++ void *value) ++{ ++ struct bpf_cgroup_storage_map *map = map_to_storage(_map); ++ struct bpf_cgroup_storage_key *key = _key; ++ struct bpf_cgroup_storage *storage; ++ int cpu, off = 0; ++ u32 size; ++ ++ rcu_read_lock(); ++ storage = cgroup_storage_lookup(map, key, false); ++ if (!storage) { ++ rcu_read_unlock(); ++ return -ENOENT; ++ } ++ ++ /* per_cpu areas are zero-filled and bpf programs can only ++ * access 'value_size' of them, so copying rounded areas ++ * will not leak any kernel data ++ */ ++ size = round_up(_map->value_size, 8); ++ for_each_possible_cpu(cpu) { ++ bpf_long_memcpy(value + off, ++ per_cpu_ptr(storage->percpu_buf, cpu), size); ++ off += size; ++ } ++ rcu_read_unlock(); ++ return 0; ++} ++ ++int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key, ++ void *value, u64 map_flags) ++{ ++ struct bpf_cgroup_storage_map *map = map_to_storage(_map); ++ struct bpf_cgroup_storage_key *key = _key; ++ struct bpf_cgroup_storage *storage; ++ int cpu, off = 0; ++ u32 size; ++ ++ if (map_flags != BPF_ANY && map_flags != BPF_EXIST) ++ return -EINVAL; ++ ++ rcu_read_lock(); ++ storage = cgroup_storage_lookup(map, key, false); ++ if (!storage) { ++ rcu_read_unlock(); ++ return -ENOENT; ++ } ++ ++ /* the user space will provide round_up(value_size, 8) bytes that ++ * will be copied into per-cpu area. bpf programs can only access ++ * value_size of it. During lookup the same extra bytes will be ++ * returned or zeros which were zero-filled by percpu_alloc, ++ * so no kernel data leaks possible ++ */ ++ size = round_up(_map->value_size, 8); ++ for_each_possible_cpu(cpu) { ++ bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu), ++ value + off, size); ++ off += size; ++ } ++ rcu_read_unlock(); ++ return 0; ++} ++ ++static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key, ++ void *_next_key) ++{ ++ struct bpf_cgroup_storage_map *map = map_to_storage(_map); ++ struct bpf_cgroup_storage_key *key = _key; ++ struct bpf_cgroup_storage_key *next = _next_key; ++ struct bpf_cgroup_storage *storage; ++ ++ spin_lock_bh(&map->lock); ++ ++ if (list_empty(&map->list)) ++ goto enoent; ++ ++ if (key) { ++ storage = cgroup_storage_lookup(map, key, true); ++ if (!storage) ++ goto enoent; ++ ++ storage = list_next_entry(storage, list); ++ if (!storage) ++ goto enoent; ++ } else { ++ storage = list_first_entry(&map->list, ++ struct bpf_cgroup_storage, list); ++ } ++ ++ spin_unlock_bh(&map->lock); ++ next->attach_type = storage->key.attach_type; ++ next->cgroup_inode_id = storage->key.cgroup_inode_id; ++ return 0; ++ ++enoent: ++ spin_unlock_bh(&map->lock); ++ return -ENOENT; ++} ++ ++static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) ++{ ++ int numa_node = bpf_map_attr_numa_node(attr); ++ struct bpf_cgroup_storage_map *map; ++ struct bpf_map_memory mem; ++ int ret; ++ ++ if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) ++ return ERR_PTR(-EINVAL); ++ ++ if (attr->value_size == 0) ++ return ERR_PTR(-EINVAL); ++ ++ if (attr->value_size > PAGE_SIZE) ++ return ERR_PTR(-E2BIG); ++ ++ if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK || ++ !bpf_map_flags_access_ok(attr->map_flags)) ++ return ERR_PTR(-EINVAL); ++ ++ if (attr->max_entries) ++ /* max_entries is not used and enforced to be 0 */ ++ return ERR_PTR(-EINVAL); ++ ++ ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map)); ++ if (ret < 0) ++ return ERR_PTR(ret); ++ ++ map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), ++ __GFP_ZERO | GFP_USER, numa_node); ++ if (!map) { ++ bpf_map_charge_finish(&mem); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ bpf_map_charge_move(&map->map.memory, &mem); ++ ++ /* copy mandatory map attributes */ ++ bpf_map_init_from_attr(&map->map, attr); ++ ++ spin_lock_init(&map->lock); ++ map->root = RB_ROOT; ++ INIT_LIST_HEAD(&map->list); ++ ++ return &map->map; ++} ++ ++static void cgroup_storage_map_free(struct bpf_map *_map) ++{ ++ struct bpf_cgroup_storage_map *map = map_to_storage(_map); ++ ++ WARN_ON(!RB_EMPTY_ROOT(&map->root)); ++ WARN_ON(!list_empty(&map->list)); ++ ++ kfree(map); ++} ++ ++static int cgroup_storage_delete_elem(struct bpf_map *map, void *key) ++{ ++ return -EINVAL; ++} ++ ++static int cgroup_storage_check_btf(const struct bpf_map *map, ++ const struct btf *btf, ++ const struct btf_type *key_type, ++ const struct btf_type *value_type) ++{ ++ struct btf_member *m; ++ u32 offset, size; ++ ++ /* Key is expected to be of struct bpf_cgroup_storage_key type, ++ * which is: ++ * struct bpf_cgroup_storage_key { ++ * __u64 cgroup_inode_id; ++ * __u32 attach_type; ++ * }; ++ */ ++ ++ /* ++ * Key_type must be a structure with two fields. ++ */ ++ if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT || ++ BTF_INFO_VLEN(key_type->info) != 2) ++ return -EINVAL; ++ ++ /* ++ * The first field must be a 64 bit integer at 0 offset. ++ */ ++ m = (struct btf_member *)(key_type + 1); ++ size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, cgroup_inode_id); ++ if (!btf_member_is_reg_int(btf, key_type, m, 0, size)) ++ return -EINVAL; ++ ++ /* ++ * The second field must be a 32 bit integer at 64 bit offset. ++ */ ++ m++; ++ offset = offsetof(struct bpf_cgroup_storage_key, attach_type); ++ size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, attach_type); ++ if (!btf_member_is_reg_int(btf, key_type, m, offset, size)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key, ++ struct seq_file *m) ++{ ++ enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); ++ struct bpf_cgroup_storage_key *key = _key; ++ struct bpf_cgroup_storage *storage; ++ int cpu; ++ ++ rcu_read_lock(); ++ storage = cgroup_storage_lookup(map_to_storage(map), key, false); ++ if (!storage) { ++ rcu_read_unlock(); ++ return; ++ } ++ ++ btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); ++ stype = cgroup_storage_type(map); ++ if (stype == BPF_CGROUP_STORAGE_SHARED) { ++ seq_puts(m, ": "); ++ btf_type_seq_show(map->btf, map->btf_value_type_id, ++ &READ_ONCE(storage->buf)->data[0], m); ++ seq_puts(m, "\n"); ++ } else { ++ seq_puts(m, ": {\n"); ++ for_each_possible_cpu(cpu) { ++ seq_printf(m, "\tcpu%d: ", cpu); ++ btf_type_seq_show(map->btf, map->btf_value_type_id, ++ per_cpu_ptr(storage->percpu_buf, cpu), ++ m); ++ seq_puts(m, "\n"); ++ } ++ seq_puts(m, "}\n"); ++ } ++ rcu_read_unlock(); ++} ++ ++const struct bpf_map_ops cgroup_storage_map_ops = { ++ .map_alloc = cgroup_storage_map_alloc, ++ .map_free = cgroup_storage_map_free, ++ .map_get_next_key = cgroup_storage_get_next_key, ++ .map_lookup_elem = cgroup_storage_lookup_elem, ++ .map_update_elem = cgroup_storage_update_elem, ++ .map_delete_elem = cgroup_storage_delete_elem, ++ .map_check_btf = cgroup_storage_check_btf, ++ .map_seq_show_elem = cgroup_storage_seq_show_elem, ++}; ++ ++int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map) ++{ ++ enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map); ++ struct bpf_cgroup_storage_map *map = map_to_storage(_map); ++ int ret = -EBUSY; ++ ++ spin_lock_bh(&map->lock); ++ ++ if (map->prog && map->prog != prog) ++ goto unlock; ++ if (prog->aux->cgroup_storage[stype] && ++ prog->aux->cgroup_storage[stype] != _map) ++ goto unlock; ++ ++ map->prog = prog; ++ prog->aux->cgroup_storage[stype] = _map; ++ ret = 0; ++unlock: ++ spin_unlock_bh(&map->lock); ++ ++ return ret; ++} ++ ++void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map) ++{ ++ enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map); ++ struct bpf_cgroup_storage_map *map = map_to_storage(_map); ++ ++ spin_lock_bh(&map->lock); ++ if (map->prog == prog) { ++ WARN_ON(prog->aux->cgroup_storage[stype] != _map); ++ map->prog = NULL; ++ prog->aux->cgroup_storage[stype] = NULL; ++ } ++ spin_unlock_bh(&map->lock); ++} ++ ++static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages) ++{ ++ size_t size; ++ ++ if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) { ++ size = sizeof(struct bpf_storage_buffer) + map->value_size; ++ *pages = round_up(sizeof(struct bpf_cgroup_storage) + size, ++ PAGE_SIZE) >> PAGE_SHIFT; ++ } else { ++ size = map->value_size; ++ *pages = round_up(round_up(size, 8) * num_possible_cpus(), ++ PAGE_SIZE) >> PAGE_SHIFT; ++ } ++ ++ return size; ++} ++ ++struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, ++ enum bpf_cgroup_storage_type stype) ++{ ++ struct bpf_cgroup_storage *storage; ++ struct bpf_map *map; ++ gfp_t flags; ++ size_t size; ++ u32 pages; ++ ++ map = prog->aux->cgroup_storage[stype]; ++ if (!map) ++ return NULL; ++ ++ size = bpf_cgroup_storage_calculate_size(map, &pages); ++ ++ if (bpf_map_charge_memlock(map, pages)) ++ return ERR_PTR(-EPERM); ++ ++ storage = kmalloc_node(sizeof(struct bpf_cgroup_storage), ++ __GFP_ZERO | GFP_USER, map->numa_node); ++ if (!storage) ++ goto enomem; ++ ++ flags = __GFP_ZERO | GFP_USER; ++ ++ if (stype == BPF_CGROUP_STORAGE_SHARED) { ++ storage->buf = kmalloc_node(size, flags, map->numa_node); ++ if (!storage->buf) ++ goto enomem; ++ check_and_init_map_lock(map, storage->buf->data); ++ } else { ++ storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags); ++ if (!storage->percpu_buf) ++ goto enomem; ++ } ++ ++ storage->map = (struct bpf_cgroup_storage_map *)map; ++ ++ return storage; ++ ++enomem: ++ bpf_map_uncharge_memlock(map, pages); ++ kfree(storage); ++ return ERR_PTR(-ENOMEM); ++} ++ ++static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu) ++{ ++ struct bpf_cgroup_storage *storage = ++ container_of(rcu, struct bpf_cgroup_storage, rcu); ++ ++ kfree(storage->buf); ++ kfree(storage); ++} ++ ++static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu) ++{ ++ struct bpf_cgroup_storage *storage = ++ container_of(rcu, struct bpf_cgroup_storage, rcu); ++ ++ free_percpu(storage->percpu_buf); ++ kfree(storage); ++} ++ ++void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage) ++{ ++ enum bpf_cgroup_storage_type stype; ++ struct bpf_map *map; ++ u32 pages; ++ ++ if (!storage) ++ return; ++ ++ map = &storage->map->map; ++ ++ bpf_cgroup_storage_calculate_size(map, &pages); ++ bpf_map_uncharge_memlock(map, pages); ++ ++ stype = cgroup_storage_type(map); ++ if (stype == BPF_CGROUP_STORAGE_SHARED) ++ call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu); ++ else ++ call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu); ++} ++ ++void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, ++ struct cgroup *cgroup, ++ enum bpf_attach_type type) ++{ ++ struct bpf_cgroup_storage_map *map; ++ ++ if (!storage) ++ return; ++ ++ storage->key.attach_type = type; ++ storage->key.cgroup_inode_id = cgroup->kn->id.id; ++ ++ map = storage->map; ++ ++ spin_lock_bh(&map->lock); ++ WARN_ON(cgroup_storage_insert(map, storage)); ++ list_add(&storage->list, &map->list); ++ spin_unlock_bh(&map->lock); ++} ++ ++void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage) ++{ ++ struct bpf_cgroup_storage_map *map; ++ struct rb_root *root; ++ ++ if (!storage) ++ return; ++ ++ map = storage->map; ++ ++ spin_lock_bh(&map->lock); ++ root = &map->root; ++ rb_erase(&storage->node, root); ++ ++ list_del(&storage->list); ++ spin_unlock_bh(&map->lock); ++} ++ ++#endif +--- /dev/null ++++ b/kernel/bpf/lpm_trie.c +@@ -0,0 +1,746 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Longest prefix match list implementation ++ * ++ * Copyright (c) 2016,2017 Daniel Mack ++ * Copyright (c) 2016 David Herrmann ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Intermediate node */ ++#define LPM_TREE_NODE_FLAG_IM BIT(0) ++ ++struct lpm_trie_node; ++ ++struct lpm_trie_node { ++ struct rcu_head rcu; ++ struct lpm_trie_node __rcu *child[2]; ++ u32 prefixlen; ++ u32 flags; ++ u8 data[0]; ++}; ++ ++struct lpm_trie { ++ struct bpf_map map; ++ struct lpm_trie_node __rcu *root; ++ size_t n_entries; ++ size_t max_prefixlen; ++ size_t data_size; ++ raw_spinlock_t lock; ++}; ++ ++/* This trie implements a longest prefix match algorithm that can be used to ++ * match IP addresses to a stored set of ranges. ++ * ++ * Data stored in @data of struct bpf_lpm_key and struct lpm_trie_node is ++ * interpreted as big endian, so data[0] stores the most significant byte. ++ * ++ * Match ranges are internally stored in instances of struct lpm_trie_node ++ * which each contain their prefix length as well as two pointers that may ++ * lead to more nodes containing more specific matches. Each node also stores ++ * a value that is defined by and returned to userspace via the update_elem ++ * and lookup functions. ++ * ++ * For instance, let's start with a trie that was created with a prefix length ++ * of 32, so it can be used for IPv4 addresses, and one single element that ++ * matches 192.168.0.0/16. The data array would hence contain ++ * [0xc0, 0xa8, 0x00, 0x00] in big-endian notation. This documentation will ++ * stick to IP-address notation for readability though. ++ * ++ * As the trie is empty initially, the new node (1) will be places as root ++ * node, denoted as (R) in the example below. As there are no other node, both ++ * child pointers are %NULL. ++ * ++ * +----------------+ ++ * | (1) (R) | ++ * | 192.168.0.0/16 | ++ * | value: 1 | ++ * | [0] [1] | ++ * +----------------+ ++ * ++ * Next, let's add a new node (2) matching 192.168.0.0/24. As there is already ++ * a node with the same data and a smaller prefix (ie, a less specific one), ++ * node (2) will become a child of (1). In child index depends on the next bit ++ * that is outside of what (1) matches, and that bit is 0, so (2) will be ++ * child[0] of (1): ++ * ++ * +----------------+ ++ * | (1) (R) | ++ * | 192.168.0.0/16 | ++ * | value: 1 | ++ * | [0] [1] | ++ * +----------------+ ++ * | ++ * +----------------+ ++ * | (2) | ++ * | 192.168.0.0/24 | ++ * | value: 2 | ++ * | [0] [1] | ++ * +----------------+ ++ * ++ * The child[1] slot of (1) could be filled with another node which has bit #17 ++ * (the next bit after the ones that (1) matches on) set to 1. For instance, ++ * 192.168.128.0/24: ++ * ++ * +----------------+ ++ * | (1) (R) | ++ * | 192.168.0.0/16 | ++ * | value: 1 | ++ * | [0] [1] | ++ * +----------------+ ++ * | | ++ * +----------------+ +------------------+ ++ * | (2) | | (3) | ++ * | 192.168.0.0/24 | | 192.168.128.0/24 | ++ * | value: 2 | | value: 3 | ++ * | [0] [1] | | [0] [1] | ++ * +----------------+ +------------------+ ++ * ++ * Let's add another node (4) to the game for 192.168.1.0/24. In order to place ++ * it, node (1) is looked at first, and because (4) of the semantics laid out ++ * above (bit #17 is 0), it would normally be attached to (1) as child[0]. ++ * However, that slot is already allocated, so a new node is needed in between. ++ * That node does not have a value attached to it and it will never be ++ * returned to users as result of a lookup. It is only there to differentiate ++ * the traversal further. It will get a prefix as wide as necessary to ++ * distinguish its two children: ++ * ++ * +----------------+ ++ * | (1) (R) | ++ * | 192.168.0.0/16 | ++ * | value: 1 | ++ * | [0] [1] | ++ * +----------------+ ++ * | | ++ * +----------------+ +------------------+ ++ * | (4) (I) | | (3) | ++ * | 192.168.0.0/23 | | 192.168.128.0/24 | ++ * | value: --- | | value: 3 | ++ * | [0] [1] | | [0] [1] | ++ * +----------------+ +------------------+ ++ * | | ++ * +----------------+ +----------------+ ++ * | (2) | | (5) | ++ * | 192.168.0.0/24 | | 192.168.1.0/24 | ++ * | value: 2 | | value: 5 | ++ * | [0] [1] | | [0] [1] | ++ * +----------------+ +----------------+ ++ * ++ * 192.168.1.1/32 would be a child of (5) etc. ++ * ++ * An intermediate node will be turned into a 'real' node on demand. In the ++ * example above, (4) would be re-used if 192.168.0.0/23 is added to the trie. ++ * ++ * A fully populated trie would have a height of 32 nodes, as the trie was ++ * created with a prefix length of 32. ++ * ++ * The lookup starts at the root node. If the current node matches and if there ++ * is a child that can be used to become more specific, the trie is traversed ++ * downwards. The last node in the traversal that is a non-intermediate one is ++ * returned. ++ */ ++ ++static inline int extract_bit(const u8 *data, size_t index) ++{ ++ return !!(data[index / 8] & (1 << (7 - (index % 8)))); ++} ++ ++/** ++ * longest_prefix_match() - determine the longest prefix ++ * @trie: The trie to get internal sizes from ++ * @node: The node to operate on ++ * @key: The key to compare to @node ++ * ++ * Determine the longest prefix of @node that matches the bits in @key. ++ */ ++static size_t longest_prefix_match(const struct lpm_trie *trie, ++ const struct lpm_trie_node *node, ++ const struct bpf_lpm_trie_key *key) ++{ ++ u32 limit = min(node->prefixlen, key->prefixlen); ++ u32 prefixlen = 0, i = 0; ++ ++ BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32)); ++ BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32)); ++ ++#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT) ++ ++ /* data_size >= 16 has very small probability. ++ * We do not use a loop for optimal code generation. ++ */ ++ if (trie->data_size >= 8) { ++ u64 diff = be64_to_cpu(*(__be64 *)node->data ^ ++ *(__be64 *)key->data); ++ ++ prefixlen = 64 - fls64(diff); ++ if (prefixlen >= limit) ++ return limit; ++ if (diff) ++ return prefixlen; ++ i = 8; ++ } ++#endif ++ ++ while (trie->data_size >= i + 4) { ++ u32 diff = be32_to_cpu(*(__be32 *)&node->data[i] ^ ++ *(__be32 *)&key->data[i]); ++ ++ prefixlen += 32 - fls(diff); ++ if (prefixlen >= limit) ++ return limit; ++ if (diff) ++ return prefixlen; ++ i += 4; ++ } ++ ++ if (trie->data_size >= i + 2) { ++ u16 diff = be16_to_cpu(*(__be16 *)&node->data[i] ^ ++ *(__be16 *)&key->data[i]); ++ ++ prefixlen += 16 - fls(diff); ++ if (prefixlen >= limit) ++ return limit; ++ if (diff) ++ return prefixlen; ++ i += 2; ++ } ++ ++ if (trie->data_size >= i + 1) { ++ prefixlen += 8 - fls(node->data[i] ^ key->data[i]); ++ ++ if (prefixlen >= limit) ++ return limit; ++ } ++ ++ return prefixlen; ++} ++ ++/* Called from syscall or from eBPF program */ ++static void *trie_lookup_elem(struct bpf_map *map, void *_key) ++{ ++ struct lpm_trie *trie = container_of(map, struct lpm_trie, map); ++ struct lpm_trie_node *node, *found = NULL; ++ struct bpf_lpm_trie_key *key = _key; ++ ++ /* Start walking the trie from the root node ... */ ++ ++ for (node = rcu_dereference(trie->root); node;) { ++ unsigned int next_bit; ++ size_t matchlen; ++ ++ /* Determine the longest prefix of @node that matches @key. ++ * If it's the maximum possible prefix for this trie, we have ++ * an exact match and can return it directly. ++ */ ++ matchlen = longest_prefix_match(trie, node, key); ++ if (matchlen == trie->max_prefixlen) { ++ found = node; ++ break; ++ } ++ ++ /* If the number of bits that match is smaller than the prefix ++ * length of @node, bail out and return the node we have seen ++ * last in the traversal (ie, the parent). ++ */ ++ if (matchlen < node->prefixlen) ++ break; ++ ++ /* Consider this node as return candidate unless it is an ++ * artificially added intermediate one. ++ */ ++ if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) ++ found = node; ++ ++ /* If the node match is fully satisfied, let's see if we can ++ * become more specific. Determine the next bit in the key and ++ * traverse down. ++ */ ++ next_bit = extract_bit(key->data, node->prefixlen); ++ node = rcu_dereference(node->child[next_bit]); ++ } ++ ++ if (!found) ++ return NULL; ++ ++ return found->data + trie->data_size; ++} ++ ++static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie, ++ const void *value) ++{ ++ struct lpm_trie_node *node; ++ size_t size = sizeof(struct lpm_trie_node) + trie->data_size; ++ ++ if (value) ++ size += trie->map.value_size; ++ ++ node = kmalloc_node(size, GFP_ATOMIC | __GFP_NOWARN, ++ trie->map.numa_node); ++ if (!node) ++ return NULL; ++ ++ node->flags = 0; ++ ++ if (value) ++ memcpy(node->data + trie->data_size, value, ++ trie->map.value_size); ++ ++ return node; ++} ++ ++/* Called from syscall or from eBPF program */ ++static int trie_update_elem(struct bpf_map *map, ++ void *_key, void *value, u64 flags) ++{ ++ struct lpm_trie *trie = container_of(map, struct lpm_trie, map); ++ struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL; ++ struct lpm_trie_node __rcu **slot; ++ struct bpf_lpm_trie_key *key = _key; ++ unsigned long irq_flags; ++ unsigned int next_bit; ++ size_t matchlen = 0; ++ int ret = 0; ++ ++ if (unlikely(flags > BPF_EXIST)) ++ return -EINVAL; ++ ++ if (key->prefixlen > trie->max_prefixlen) ++ return -EINVAL; ++ ++ raw_spin_lock_irqsave(&trie->lock, irq_flags); ++ ++ /* Allocate and fill a new node */ ++ ++ if (trie->n_entries == trie->map.max_entries) { ++ ret = -ENOSPC; ++ goto out; ++ } ++ ++ new_node = lpm_trie_node_alloc(trie, value); ++ if (!new_node) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ trie->n_entries++; ++ ++ new_node->prefixlen = key->prefixlen; ++ RCU_INIT_POINTER(new_node->child[0], NULL); ++ RCU_INIT_POINTER(new_node->child[1], NULL); ++ memcpy(new_node->data, key->data, trie->data_size); ++ ++ /* Now find a slot to attach the new node. To do that, walk the tree ++ * from the root and match as many bits as possible for each node until ++ * we either find an empty slot or a slot that needs to be replaced by ++ * an intermediate node. ++ */ ++ slot = &trie->root; ++ ++ while ((node = rcu_dereference_protected(*slot, ++ lockdep_is_held(&trie->lock)))) { ++ matchlen = longest_prefix_match(trie, node, key); ++ ++ if (node->prefixlen != matchlen || ++ node->prefixlen == key->prefixlen || ++ node->prefixlen == trie->max_prefixlen) ++ break; ++ ++ next_bit = extract_bit(key->data, node->prefixlen); ++ slot = &node->child[next_bit]; ++ } ++ ++ /* If the slot is empty (a free child pointer or an empty root), ++ * simply assign the @new_node to that slot and be done. ++ */ ++ if (!node) { ++ rcu_assign_pointer(*slot, new_node); ++ goto out; ++ } ++ ++ /* If the slot we picked already exists, replace it with @new_node ++ * which already has the correct data array set. ++ */ ++ if (node->prefixlen == matchlen) { ++ new_node->child[0] = node->child[0]; ++ new_node->child[1] = node->child[1]; ++ ++ if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) ++ trie->n_entries--; ++ ++ rcu_assign_pointer(*slot, new_node); ++ kfree_rcu(node, rcu); ++ ++ goto out; ++ } ++ ++ /* If the new node matches the prefix completely, it must be inserted ++ * as an ancestor. Simply insert it between @node and *@slot. ++ */ ++ if (matchlen == key->prefixlen) { ++ next_bit = extract_bit(node->data, matchlen); ++ rcu_assign_pointer(new_node->child[next_bit], node); ++ rcu_assign_pointer(*slot, new_node); ++ goto out; ++ } ++ ++ im_node = lpm_trie_node_alloc(trie, NULL); ++ if (!im_node) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ im_node->prefixlen = matchlen; ++ im_node->flags |= LPM_TREE_NODE_FLAG_IM; ++ memcpy(im_node->data, node->data, trie->data_size); ++ ++ /* Now determine which child to install in which slot */ ++ if (extract_bit(key->data, matchlen)) { ++ rcu_assign_pointer(im_node->child[0], node); ++ rcu_assign_pointer(im_node->child[1], new_node); ++ } else { ++ rcu_assign_pointer(im_node->child[0], new_node); ++ rcu_assign_pointer(im_node->child[1], node); ++ } ++ ++ /* Finally, assign the intermediate node to the determined spot */ ++ rcu_assign_pointer(*slot, im_node); ++ ++out: ++ if (ret) { ++ if (new_node) ++ trie->n_entries--; ++ ++ kfree(new_node); ++ kfree(im_node); ++ } ++ ++ raw_spin_unlock_irqrestore(&trie->lock, irq_flags); ++ ++ return ret; ++} ++ ++/* Called from syscall or from eBPF program */ ++static int trie_delete_elem(struct bpf_map *map, void *_key) ++{ ++ struct lpm_trie *trie = container_of(map, struct lpm_trie, map); ++ struct bpf_lpm_trie_key *key = _key; ++ struct lpm_trie_node __rcu **trim, **trim2; ++ struct lpm_trie_node *node, *parent; ++ unsigned long irq_flags; ++ unsigned int next_bit; ++ size_t matchlen = 0; ++ int ret = 0; ++ ++ if (key->prefixlen > trie->max_prefixlen) ++ return -EINVAL; ++ ++ raw_spin_lock_irqsave(&trie->lock, irq_flags); ++ ++ /* Walk the tree looking for an exact key/length match and keeping ++ * track of the path we traverse. We will need to know the node ++ * we wish to delete, and the slot that points to the node we want ++ * to delete. We may also need to know the nodes parent and the ++ * slot that contains it. ++ */ ++ trim = &trie->root; ++ trim2 = trim; ++ parent = NULL; ++ while ((node = rcu_dereference_protected( ++ *trim, lockdep_is_held(&trie->lock)))) { ++ matchlen = longest_prefix_match(trie, node, key); ++ ++ if (node->prefixlen != matchlen || ++ node->prefixlen == key->prefixlen) ++ break; ++ ++ parent = node; ++ trim2 = trim; ++ next_bit = extract_bit(key->data, node->prefixlen); ++ trim = &node->child[next_bit]; ++ } ++ ++ if (!node || node->prefixlen != key->prefixlen || ++ node->prefixlen != matchlen || ++ (node->flags & LPM_TREE_NODE_FLAG_IM)) { ++ ret = -ENOENT; ++ goto out; ++ } ++ ++ trie->n_entries--; ++ ++ /* If the node we are removing has two children, simply mark it ++ * as intermediate and we are done. ++ */ ++ if (rcu_access_pointer(node->child[0]) && ++ rcu_access_pointer(node->child[1])) { ++ node->flags |= LPM_TREE_NODE_FLAG_IM; ++ goto out; ++ } ++ ++ /* If the parent of the node we are about to delete is an intermediate ++ * node, and the deleted node doesn't have any children, we can delete ++ * the intermediate parent as well and promote its other child ++ * up the tree. Doing this maintains the invariant that all ++ * intermediate nodes have exactly 2 children and that there are no ++ * unnecessary intermediate nodes in the tree. ++ */ ++ if (parent && (parent->flags & LPM_TREE_NODE_FLAG_IM) && ++ !node->child[0] && !node->child[1]) { ++ if (node == rcu_access_pointer(parent->child[0])) ++ rcu_assign_pointer( ++ *trim2, rcu_access_pointer(parent->child[1])); ++ else ++ rcu_assign_pointer( ++ *trim2, rcu_access_pointer(parent->child[0])); ++ kfree_rcu(parent, rcu); ++ kfree_rcu(node, rcu); ++ goto out; ++ } ++ ++ /* The node we are removing has either zero or one child. If there ++ * is a child, move it into the removed node's slot then delete ++ * the node. Otherwise just clear the slot and delete the node. ++ */ ++ if (node->child[0]) ++ rcu_assign_pointer(*trim, rcu_access_pointer(node->child[0])); ++ else if (node->child[1]) ++ rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1])); ++ else ++ RCU_INIT_POINTER(*trim, NULL); ++ kfree_rcu(node, rcu); ++ ++out: ++ raw_spin_unlock_irqrestore(&trie->lock, irq_flags); ++ ++ return ret; ++} ++ ++#define LPM_DATA_SIZE_MAX 256 ++#define LPM_DATA_SIZE_MIN 1 ++ ++#define LPM_VAL_SIZE_MAX (KMALLOC_MAX_SIZE - LPM_DATA_SIZE_MAX - \ ++ sizeof(struct lpm_trie_node)) ++#define LPM_VAL_SIZE_MIN 1 ++ ++#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key) + (X)) ++#define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX) ++#define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) ++ ++#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \ ++ BPF_F_ACCESS_MASK) ++ ++static struct bpf_map *trie_alloc(union bpf_attr *attr) ++{ ++ struct lpm_trie *trie; ++ u64 cost = sizeof(*trie), cost_per_node; ++ int ret; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return ERR_PTR(-EPERM); ++ ++ /* check sanity of attributes */ ++ if (attr->max_entries == 0 || ++ !(attr->map_flags & BPF_F_NO_PREALLOC) || ++ attr->map_flags & ~LPM_CREATE_FLAG_MASK || ++ !bpf_map_flags_access_ok(attr->map_flags) || ++ attr->key_size < LPM_KEY_SIZE_MIN || ++ attr->key_size > LPM_KEY_SIZE_MAX || ++ attr->value_size < LPM_VAL_SIZE_MIN || ++ attr->value_size > LPM_VAL_SIZE_MAX) ++ return ERR_PTR(-EINVAL); ++ ++ trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN); ++ if (!trie) ++ return ERR_PTR(-ENOMEM); ++ ++ /* copy mandatory map attributes */ ++ bpf_map_init_from_attr(&trie->map, attr); ++ trie->data_size = attr->key_size - ++ offsetof(struct bpf_lpm_trie_key, data); ++ trie->max_prefixlen = trie->data_size * 8; ++ ++ cost_per_node = sizeof(struct lpm_trie_node) + ++ attr->value_size + trie->data_size; ++ cost += (u64) attr->max_entries * cost_per_node; ++ ++ ret = bpf_map_charge_init(&trie->map.memory, cost); ++ if (ret) ++ goto out_err; ++ ++ raw_spin_lock_init(&trie->lock); ++ ++ return &trie->map; ++out_err: ++ kfree(trie); ++ return ERR_PTR(ret); ++} ++ ++static void trie_free(struct bpf_map *map) ++{ ++ struct lpm_trie *trie = container_of(map, struct lpm_trie, map); ++ struct lpm_trie_node __rcu **slot; ++ struct lpm_trie_node *node; ++ ++ /* Wait for outstanding programs to complete ++ * update/lookup/delete/get_next_key and free the trie. ++ */ ++ synchronize_rcu(); ++ ++ /* Always start at the root and walk down to a node that has no ++ * children. Then free that node, nullify its reference in the parent ++ * and start over. ++ */ ++ ++ for (;;) { ++ slot = &trie->root; ++ ++ for (;;) { ++ node = rcu_dereference_protected(*slot, 1); ++ if (!node) ++ goto out; ++ ++ if (rcu_access_pointer(node->child[0])) { ++ slot = &node->child[0]; ++ continue; ++ } ++ ++ if (rcu_access_pointer(node->child[1])) { ++ slot = &node->child[1]; ++ continue; ++ } ++ ++ kfree(node); ++ RCU_INIT_POINTER(*slot, NULL); ++ break; ++ } ++ } ++ ++out: ++ kfree(trie); ++} ++ ++static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) ++{ ++ struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root; ++ struct lpm_trie *trie = container_of(map, struct lpm_trie, map); ++ struct bpf_lpm_trie_key *key = _key, *next_key = _next_key; ++ struct lpm_trie_node **node_stack = NULL; ++ int err = 0, stack_ptr = -1; ++ unsigned int next_bit; ++ size_t matchlen; ++ ++ /* The get_next_key follows postorder. For the 4 node example in ++ * the top of this file, the trie_get_next_key() returns the following ++ * one after another: ++ * 192.168.0.0/24 ++ * 192.168.1.0/24 ++ * 192.168.128.0/24 ++ * 192.168.0.0/16 ++ * ++ * The idea is to return more specific keys before less specific ones. ++ */ ++ ++ /* Empty trie */ ++ search_root = rcu_dereference(trie->root); ++ if (!search_root) ++ return -ENOENT; ++ ++ /* For invalid key, find the leftmost node in the trie */ ++ if (!key || key->prefixlen > trie->max_prefixlen) ++ goto find_leftmost; ++ ++ node_stack = kmalloc_array(trie->max_prefixlen, ++ sizeof(struct lpm_trie_node *), ++ GFP_ATOMIC | __GFP_NOWARN); ++ if (!node_stack) ++ return -ENOMEM; ++ ++ /* Try to find the exact node for the given key */ ++ for (node = search_root; node;) { ++ node_stack[++stack_ptr] = node; ++ matchlen = longest_prefix_match(trie, node, key); ++ if (node->prefixlen != matchlen || ++ node->prefixlen == key->prefixlen) ++ break; ++ ++ next_bit = extract_bit(key->data, node->prefixlen); ++ node = rcu_dereference(node->child[next_bit]); ++ } ++ if (!node || node->prefixlen != key->prefixlen || ++ (node->flags & LPM_TREE_NODE_FLAG_IM)) ++ goto find_leftmost; ++ ++ /* The node with the exactly-matching key has been found, ++ * find the first node in postorder after the matched node. ++ */ ++ node = node_stack[stack_ptr]; ++ while (stack_ptr > 0) { ++ parent = node_stack[stack_ptr - 1]; ++ if (rcu_dereference(parent->child[0]) == node) { ++ search_root = rcu_dereference(parent->child[1]); ++ if (search_root) ++ goto find_leftmost; ++ } ++ if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) { ++ next_node = parent; ++ goto do_copy; ++ } ++ ++ node = parent; ++ stack_ptr--; ++ } ++ ++ /* did not find anything */ ++ err = -ENOENT; ++ goto free_stack; ++ ++find_leftmost: ++ /* Find the leftmost non-intermediate node, all intermediate nodes ++ * have exact two children, so this function will never return NULL. ++ */ ++ for (node = search_root; node;) { ++ if (node->flags & LPM_TREE_NODE_FLAG_IM) { ++ node = rcu_dereference(node->child[0]); ++ } else { ++ next_node = node; ++ node = rcu_dereference(node->child[0]); ++ if (!node) ++ node = rcu_dereference(next_node->child[1]); ++ } ++ } ++do_copy: ++ next_key->prefixlen = next_node->prefixlen; ++ memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data), ++ next_node->data, trie->data_size); ++free_stack: ++ kfree(node_stack); ++ return err; ++} ++ ++static int trie_check_btf(const struct bpf_map *map, ++ const struct btf *btf, ++ const struct btf_type *key_type, ++ const struct btf_type *value_type) ++{ ++ /* Keys must have struct bpf_lpm_trie_key embedded. */ ++ return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ? ++ -EINVAL : 0; ++} ++ ++const struct bpf_map_ops trie_map_ops = { ++ .map_alloc = trie_alloc, ++ .map_free = trie_free, ++ .map_get_next_key = trie_get_next_key, ++ .map_lookup_elem = trie_lookup_elem, ++ .map_update_elem = trie_update_elem, ++ .map_delete_elem = trie_delete_elem, ++ .map_check_btf = trie_check_btf, ++}; +--- a/kernel/bpf/Makefile ++++ b/kernel/bpf/Makefile +@@ -1,4 +1,23 @@ ++# SPDX-License-Identifier: GPL-2.0 + obj-y := core.o ++ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y) ++# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details ++cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse ++endif ++CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) + +-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o +-obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o ++obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o ++obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o ++obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ++obj-$(CONFIG_BPF_SYSCALL) += disasm.o ++obj-$(CONFIG_BPF_SYSCALL) += btf.o ++ifeq ($(CONFIG_NET),y) ++obj-$(CONFIG_BPF_SYSCALL) += devmap.o ++endif ++ifeq ($(CONFIG_PERF_EVENTS),y) ++obj-$(CONFIG_BPF_SYSCALL) += stackmap.o ++endif ++obj-$(CONFIG_CGROUP_BPF) += cgroup.o ++ifeq ($(CONFIG_SYSFS),y) ++obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o ++endif +--- /dev/null ++++ b/kernel/bpf/map_in_map.c +@@ -0,0 +1,120 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* Copyright (c) 2017 Facebook ++ */ ++#include ++#include ++ ++#include "map_in_map.h" ++ ++struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) ++{ ++ struct bpf_map *inner_map, *inner_map_meta; ++ u32 inner_map_meta_size; ++ struct fd f; ++ ++ f = fdget(inner_map_ufd); ++ inner_map = __bpf_map_get(f); ++ if (IS_ERR(inner_map)) ++ return inner_map; ++ ++ /* prog_array->owner_prog_type and owner_jited ++ * is a runtime binding. Doing static check alone ++ * in the verifier is not enough. ++ */ ++ if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY || ++ inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || ++ inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { ++ fdput(f); ++ return ERR_PTR(-ENOTSUPP); ++ } ++ ++ /* Does not support >1 level map-in-map */ ++ if (inner_map->inner_map_meta) { ++ fdput(f); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ if (map_value_has_spin_lock(inner_map)) { ++ fdput(f); ++ return ERR_PTR(-ENOTSUPP); ++ } ++ ++ inner_map_meta_size = sizeof(*inner_map_meta); ++ /* In some cases verifier needs to access beyond just base map. */ ++ if (inner_map->ops == &array_map_ops) ++ inner_map_meta_size = sizeof(struct bpf_array); ++ ++ inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); ++ if (!inner_map_meta) { ++ fdput(f); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ inner_map_meta->map_type = inner_map->map_type; ++ inner_map_meta->key_size = inner_map->key_size; ++ inner_map_meta->value_size = inner_map->value_size; ++ inner_map_meta->map_flags = inner_map->map_flags; ++ inner_map_meta->max_entries = inner_map->max_entries; ++ inner_map_meta->spin_lock_off = inner_map->spin_lock_off; ++ ++ /* Misc members not needed in bpf_map_meta_equal() check. */ ++ inner_map_meta->ops = inner_map->ops; ++ if (inner_map->ops == &array_map_ops) { ++ inner_map_meta->unpriv_array = inner_map->unpriv_array; ++ container_of(inner_map_meta, struct bpf_array, map)->index_mask = ++ container_of(inner_map, struct bpf_array, map)->index_mask; ++ } ++ ++ fdput(f); ++ return inner_map_meta; ++} ++ ++void bpf_map_meta_free(struct bpf_map *map_meta) ++{ ++ kfree(map_meta); ++} ++ ++bool bpf_map_meta_equal(const struct bpf_map *meta0, ++ const struct bpf_map *meta1) ++{ ++ /* No need to compare ops because it is covered by map_type */ ++ return meta0->map_type == meta1->map_type && ++ meta0->key_size == meta1->key_size && ++ meta0->value_size == meta1->value_size && ++ meta0->map_flags == meta1->map_flags && ++ meta0->max_entries == meta1->max_entries; ++} ++ ++void *bpf_map_fd_get_ptr(struct bpf_map *map, ++ struct file *map_file /* not used */, ++ int ufd) ++{ ++ struct bpf_map *inner_map; ++ struct fd f; ++ ++ f = fdget(ufd); ++ inner_map = __bpf_map_get(f); ++ if (IS_ERR(inner_map)) ++ return inner_map; ++ ++ if (bpf_map_meta_equal(map->inner_map_meta, inner_map)) ++ inner_map = bpf_map_inc(inner_map, false); ++ else ++ inner_map = ERR_PTR(-EINVAL); ++ ++ fdput(f); ++ return inner_map; ++} ++ ++void bpf_map_fd_put_ptr(void *ptr) ++{ ++ /* ptr->ops->map_free() has to go through one ++ * rcu grace period by itself. ++ */ ++ bpf_map_put(ptr); ++} ++ ++u32 bpf_map_fd_sys_lookup_elem(void *ptr) ++{ ++ return ((struct bpf_map *)ptr)->id; ++} +--- /dev/null ++++ b/kernel/bpf/map_in_map.h +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* Copyright (c) 2017 Facebook ++ */ ++#ifndef __MAP_IN_MAP_H__ ++#define __MAP_IN_MAP_H__ ++ ++#include ++ ++struct file; ++struct bpf_map; ++ ++struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd); ++void bpf_map_meta_free(struct bpf_map *map_meta); ++bool bpf_map_meta_equal(const struct bpf_map *meta0, ++ const struct bpf_map *meta1); ++void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file, ++ int ufd); ++void bpf_map_fd_put_ptr(void *ptr); ++u32 bpf_map_fd_sys_lookup_elem(void *ptr); ++ ++#endif +--- /dev/null ++++ b/kernel/bpf/percpu_freelist.c +@@ -0,0 +1,118 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* Copyright (c) 2016 Facebook ++ */ ++#include "percpu_freelist.h" ++ ++int pcpu_freelist_init(struct pcpu_freelist *s) ++{ ++ int cpu; ++ ++ s->freelist = alloc_percpu(struct pcpu_freelist_head); ++ if (!s->freelist) ++ return -ENOMEM; ++ ++ for_each_possible_cpu(cpu) { ++ struct pcpu_freelist_head *head = per_cpu_ptr(s->freelist, cpu); ++ ++ raw_spin_lock_init(&head->lock); ++ head->first = NULL; ++ } ++ return 0; ++} ++ ++void pcpu_freelist_destroy(struct pcpu_freelist *s) ++{ ++ free_percpu(s->freelist); ++} ++ ++static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, ++ struct pcpu_freelist_node *node) ++{ ++ raw_spin_lock(&head->lock); ++ node->next = head->first; ++ head->first = node; ++ raw_spin_unlock(&head->lock); ++} ++ ++void __pcpu_freelist_push(struct pcpu_freelist *s, ++ struct pcpu_freelist_node *node) ++{ ++ struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); ++ ++ ___pcpu_freelist_push(head, node); ++} ++ ++void pcpu_freelist_push(struct pcpu_freelist *s, ++ struct pcpu_freelist_node *node) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ __pcpu_freelist_push(s, node); ++ local_irq_restore(flags); ++} ++ ++void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, ++ u32 nr_elems) ++{ ++ struct pcpu_freelist_head *head; ++ unsigned long flags; ++ int i, cpu, pcpu_entries; ++ ++ pcpu_entries = nr_elems / num_possible_cpus() + 1; ++ i = 0; ++ ++ /* disable irq to workaround lockdep false positive ++ * in bpf usage pcpu_freelist_populate() will never race ++ * with pcpu_freelist_push() ++ */ ++ local_irq_save(flags); ++ for_each_possible_cpu(cpu) { ++again: ++ head = per_cpu_ptr(s->freelist, cpu); ++ ___pcpu_freelist_push(head, buf); ++ i++; ++ buf += elem_size; ++ if (i == nr_elems) ++ break; ++ if (i % pcpu_entries) ++ goto again; ++ } ++ local_irq_restore(flags); ++} ++ ++struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s) ++{ ++ struct pcpu_freelist_head *head; ++ struct pcpu_freelist_node *node; ++ int orig_cpu, cpu; ++ ++ orig_cpu = cpu = raw_smp_processor_id(); ++ while (1) { ++ head = per_cpu_ptr(s->freelist, cpu); ++ raw_spin_lock(&head->lock); ++ node = head->first; ++ if (node) { ++ head->first = node->next; ++ raw_spin_unlock(&head->lock); ++ return node; ++ } ++ raw_spin_unlock(&head->lock); ++ cpu = cpumask_next(cpu, cpu_possible_mask); ++ if (cpu >= nr_cpu_ids) ++ cpu = 0; ++ if (cpu == orig_cpu) ++ return NULL; ++ } ++} ++ ++struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) ++{ ++ struct pcpu_freelist_node *ret; ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ ret = __pcpu_freelist_pop(s); ++ local_irq_restore(flags); ++ return ret; ++} +--- /dev/null ++++ b/kernel/bpf/percpu_freelist.h +@@ -0,0 +1,32 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* Copyright (c) 2016 Facebook ++ */ ++#ifndef __PERCPU_FREELIST_H__ ++#define __PERCPU_FREELIST_H__ ++#include ++#include ++ ++struct pcpu_freelist_head { ++ struct pcpu_freelist_node *first; ++ raw_spinlock_t lock; ++}; ++ ++struct pcpu_freelist { ++ struct pcpu_freelist_head __percpu *freelist; ++}; ++ ++struct pcpu_freelist_node { ++ struct pcpu_freelist_node *next; ++}; ++ ++/* pcpu_freelist_* do spin_lock_irqsave. */ ++void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); ++struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); ++/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */ ++void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); ++struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *); ++void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, ++ u32 nr_elems); ++int pcpu_freelist_init(struct pcpu_freelist *); ++void pcpu_freelist_destroy(struct pcpu_freelist *s); ++#endif +--- /dev/null ++++ b/kernel/bpf/queue_stack_maps.c +@@ -0,0 +1,289 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * queue_stack_maps.c: BPF queue and stack maps ++ * ++ * Copyright (c) 2018 Politecnico di Torino ++ */ ++#include ++#include ++#include ++#include ++#include "percpu_freelist.h" ++ ++#define QUEUE_STACK_CREATE_FLAG_MASK \ ++ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) ++ ++struct bpf_queue_stack { ++ struct bpf_map map; ++ raw_spinlock_t lock; ++ u32 head, tail; ++ u32 size; /* max_entries + 1 */ ++ ++ char elements[0] __aligned(8); ++}; ++ ++static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map) ++{ ++ return container_of(map, struct bpf_queue_stack, map); ++} ++ ++static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs) ++{ ++ return qs->head == qs->tail; ++} ++ ++static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) ++{ ++ u32 head = qs->head + 1; ++ ++ if (unlikely(head >= qs->size)) ++ head = 0; ++ ++ return head == qs->tail; ++} ++ ++/* Called from syscall */ ++static int queue_stack_map_alloc_check(union bpf_attr *attr) ++{ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ /* check sanity of attributes */ ++ if (attr->max_entries == 0 || attr->key_size != 0 || ++ attr->value_size == 0 || ++ attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK || ++ !bpf_map_flags_access_ok(attr->map_flags)) ++ return -EINVAL; ++ ++ if (attr->value_size > KMALLOC_MAX_SIZE) ++ /* if value_size is bigger, the user space won't be able to ++ * access the elements. ++ */ ++ return -E2BIG; ++ ++ return 0; ++} ++ ++static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) ++{ ++ int ret, numa_node = bpf_map_attr_numa_node(attr); ++ struct bpf_map_memory mem = {0}; ++ struct bpf_queue_stack *qs; ++ u64 size, queue_size, cost; ++ ++ size = (u64) attr->max_entries + 1; ++ cost = queue_size = sizeof(*qs) + size * attr->value_size; ++ ++ ret = bpf_map_charge_init(&mem, cost); ++ if (ret < 0) ++ return ERR_PTR(ret); ++ ++ qs = bpf_map_area_alloc(queue_size, numa_node); ++ if (!qs) { ++ bpf_map_charge_finish(&mem); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ memset(qs, 0, sizeof(*qs)); ++ ++ bpf_map_init_from_attr(&qs->map, attr); ++ ++ bpf_map_charge_move(&qs->map.memory, &mem); ++ qs->size = size; ++ ++ raw_spin_lock_init(&qs->lock); ++ ++ return &qs->map; ++} ++ ++/* Called when map->refcnt goes to zero, either from workqueue or from syscall */ ++static void queue_stack_map_free(struct bpf_map *map) ++{ ++ struct bpf_queue_stack *qs = bpf_queue_stack(map); ++ ++ /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, ++ * so the programs (can be more than one that used this map) were ++ * disconnected from events. Wait for outstanding critical sections in ++ * these programs to complete ++ */ ++ synchronize_rcu(); ++ ++ bpf_map_area_free(qs); ++} ++ ++static int __queue_map_get(struct bpf_map *map, void *value, bool delete) ++{ ++ struct bpf_queue_stack *qs = bpf_queue_stack(map); ++ unsigned long flags; ++ int err = 0; ++ void *ptr; ++ ++ raw_spin_lock_irqsave(&qs->lock, flags); ++ ++ if (queue_stack_map_is_empty(qs)) { ++ memset(value, 0, qs->map.value_size); ++ err = -ENOENT; ++ goto out; ++ } ++ ++ ptr = &qs->elements[qs->tail * qs->map.value_size]; ++ memcpy(value, ptr, qs->map.value_size); ++ ++ if (delete) { ++ if (unlikely(++qs->tail >= qs->size)) ++ qs->tail = 0; ++ } ++ ++out: ++ raw_spin_unlock_irqrestore(&qs->lock, flags); ++ return err; ++} ++ ++ ++static int __stack_map_get(struct bpf_map *map, void *value, bool delete) ++{ ++ struct bpf_queue_stack *qs = bpf_queue_stack(map); ++ unsigned long flags; ++ int err = 0; ++ void *ptr; ++ u32 index; ++ ++ raw_spin_lock_irqsave(&qs->lock, flags); ++ ++ if (queue_stack_map_is_empty(qs)) { ++ memset(value, 0, qs->map.value_size); ++ err = -ENOENT; ++ goto out; ++ } ++ ++ index = qs->head - 1; ++ if (unlikely(index >= qs->size)) ++ index = qs->size - 1; ++ ++ ptr = &qs->elements[index * qs->map.value_size]; ++ memcpy(value, ptr, qs->map.value_size); ++ ++ if (delete) ++ qs->head = index; ++ ++out: ++ raw_spin_unlock_irqrestore(&qs->lock, flags); ++ return err; ++} ++ ++/* Called from syscall or from eBPF program */ ++static int queue_map_peek_elem(struct bpf_map *map, void *value) ++{ ++ return __queue_map_get(map, value, false); ++} ++ ++/* Called from syscall or from eBPF program */ ++static int stack_map_peek_elem(struct bpf_map *map, void *value) ++{ ++ return __stack_map_get(map, value, false); ++} ++ ++/* Called from syscall or from eBPF program */ ++static int queue_map_pop_elem(struct bpf_map *map, void *value) ++{ ++ return __queue_map_get(map, value, true); ++} ++ ++/* Called from syscall or from eBPF program */ ++static int stack_map_pop_elem(struct bpf_map *map, void *value) ++{ ++ return __stack_map_get(map, value, true); ++} ++ ++/* Called from syscall or from eBPF program */ ++static int queue_stack_map_push_elem(struct bpf_map *map, void *value, ++ u64 flags) ++{ ++ struct bpf_queue_stack *qs = bpf_queue_stack(map); ++ unsigned long irq_flags; ++ int err = 0; ++ void *dst; ++ ++ /* BPF_EXIST is used to force making room for a new element in case the ++ * map is full ++ */ ++ bool replace = (flags & BPF_EXIST); ++ ++ /* Check supported flags for queue and stack maps */ ++ if (flags & BPF_NOEXIST || flags > BPF_EXIST) ++ return -EINVAL; ++ ++ raw_spin_lock_irqsave(&qs->lock, irq_flags); ++ ++ if (queue_stack_map_is_full(qs)) { ++ if (!replace) { ++ err = -E2BIG; ++ goto out; ++ } ++ /* advance tail pointer to overwrite oldest element */ ++ if (unlikely(++qs->tail >= qs->size)) ++ qs->tail = 0; ++ } ++ ++ dst = &qs->elements[qs->head * qs->map.value_size]; ++ memcpy(dst, value, qs->map.value_size); ++ ++ if (unlikely(++qs->head >= qs->size)) ++ qs->head = 0; ++ ++out: ++ raw_spin_unlock_irqrestore(&qs->lock, irq_flags); ++ return err; ++} ++ ++/* Called from syscall or from eBPF program */ ++static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key) ++{ ++ return NULL; ++} ++ ++/* Called from syscall or from eBPF program */ ++static int queue_stack_map_update_elem(struct bpf_map *map, void *key, ++ void *value, u64 flags) ++{ ++ return -EINVAL; ++} ++ ++/* Called from syscall or from eBPF program */ ++static int queue_stack_map_delete_elem(struct bpf_map *map, void *key) ++{ ++ return -EINVAL; ++} ++ ++/* Called from syscall */ ++static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, ++ void *next_key) ++{ ++ return -EINVAL; ++} ++ ++const struct bpf_map_ops queue_map_ops = { ++ .map_alloc_check = queue_stack_map_alloc_check, ++ .map_alloc = queue_stack_map_alloc, ++ .map_free = queue_stack_map_free, ++ .map_lookup_elem = queue_stack_map_lookup_elem, ++ .map_update_elem = queue_stack_map_update_elem, ++ .map_delete_elem = queue_stack_map_delete_elem, ++ .map_push_elem = queue_stack_map_push_elem, ++ .map_pop_elem = queue_map_pop_elem, ++ .map_peek_elem = queue_map_peek_elem, ++ .map_get_next_key = queue_stack_map_get_next_key, ++}; ++ ++const struct bpf_map_ops stack_map_ops = { ++ .map_alloc_check = queue_stack_map_alloc_check, ++ .map_alloc = queue_stack_map_alloc, ++ .map_free = queue_stack_map_free, ++ .map_lookup_elem = queue_stack_map_lookup_elem, ++ .map_update_elem = queue_stack_map_update_elem, ++ .map_delete_elem = queue_stack_map_delete_elem, ++ .map_push_elem = queue_stack_map_push_elem, ++ .map_pop_elem = stack_map_pop_elem, ++ .map_peek_elem = stack_map_peek_elem, ++ .map_get_next_key = queue_stack_map_get_next_key, ++}; +--- /dev/null ++++ b/kernel/bpf/stackmap.c +@@ -0,0 +1,634 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* Copyright (c) 2016 Facebook ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "percpu_freelist.h" ++ ++#define STACK_CREATE_FLAG_MASK \ ++ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ ++ BPF_F_STACK_BUILD_ID) ++ ++struct stack_map_bucket { ++ struct pcpu_freelist_node fnode; ++ u32 hash; ++ u32 nr; ++ u64 data[]; ++}; ++ ++struct bpf_stack_map { ++ struct bpf_map map; ++ void *elems; ++ struct pcpu_freelist freelist; ++ u32 n_buckets; ++ struct stack_map_bucket *buckets[]; ++}; ++ ++/* irq_work to run up_read() for build_id lookup in nmi context */ ++struct stack_map_irq_work { ++ struct irq_work irq_work; ++ struct rw_semaphore *sem; ++}; ++ ++static void do_up_read(struct irq_work *entry) ++{ ++ struct stack_map_irq_work *work; ++ ++ work = container_of(entry, struct stack_map_irq_work, irq_work); ++ up_read_non_owner(work->sem); ++ work->sem = NULL; ++} ++ ++static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work); ++ ++static inline bool stack_map_use_build_id(struct bpf_map *map) ++{ ++ return (map->map_flags & BPF_F_STACK_BUILD_ID); ++} ++ ++static inline int stack_map_data_size(struct bpf_map *map) ++{ ++ return stack_map_use_build_id(map) ? ++ sizeof(struct bpf_stack_build_id) : sizeof(u64); ++} ++ ++static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) ++{ ++ u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; ++ int err; ++ ++ smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, ++ smap->map.numa_node); ++ if (!smap->elems) ++ return -ENOMEM; ++ ++ err = pcpu_freelist_init(&smap->freelist); ++ if (err) ++ goto free_elems; ++ ++ pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, ++ smap->map.max_entries); ++ return 0; ++ ++free_elems: ++ bpf_map_area_free(smap->elems); ++ return err; ++} ++ ++/* Called from syscall */ ++static struct bpf_map *stack_map_alloc(union bpf_attr *attr) ++{ ++ u32 value_size = attr->value_size; ++ struct bpf_stack_map *smap; ++ struct bpf_map_memory mem; ++ u64 cost, n_buckets; ++ int err; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return ERR_PTR(-EPERM); ++ ++ if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) ++ return ERR_PTR(-EINVAL); ++ ++ /* check sanity of attributes */ ++ if (attr->max_entries == 0 || attr->key_size != 4 || ++ value_size < 8 || value_size % 8) ++ return ERR_PTR(-EINVAL); ++ ++ BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64)); ++ if (attr->map_flags & BPF_F_STACK_BUILD_ID) { ++ if (value_size % sizeof(struct bpf_stack_build_id) || ++ value_size / sizeof(struct bpf_stack_build_id) ++ > sysctl_perf_event_max_stack) ++ return ERR_PTR(-EINVAL); ++ } else if (value_size / 8 > sysctl_perf_event_max_stack) ++ return ERR_PTR(-EINVAL); ++ ++ /* hash table size must be power of 2 */ ++ n_buckets = roundup_pow_of_two(attr->max_entries); ++ if (!n_buckets) ++ return ERR_PTR(-E2BIG); ++ ++ cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); ++ cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); ++ err = bpf_map_charge_init(&mem, cost); ++ if (err) ++ return ERR_PTR(err); ++ ++ smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); ++ if (!smap) { ++ bpf_map_charge_finish(&mem); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ bpf_map_init_from_attr(&smap->map, attr); ++ smap->map.value_size = value_size; ++ smap->n_buckets = n_buckets; ++ ++ err = get_callchain_buffers(sysctl_perf_event_max_stack); ++ if (err) ++ goto free_charge; ++ ++ err = prealloc_elems_and_freelist(smap); ++ if (err) ++ goto put_buffers; ++ ++ bpf_map_charge_move(&smap->map.memory, &mem); ++ ++ return &smap->map; ++ ++put_buffers: ++ put_callchain_buffers(); ++free_charge: ++ bpf_map_charge_finish(&mem); ++ bpf_map_area_free(smap); ++ return ERR_PTR(err); ++} ++ ++#define BPF_BUILD_ID 3 ++/* ++ * Parse build id from the note segment. This logic can be shared between ++ * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are ++ * identical. ++ */ ++static inline int stack_map_parse_build_id(void *page_addr, ++ unsigned char *build_id, ++ void *note_start, ++ Elf32_Word note_size) ++{ ++ Elf32_Word note_offs = 0, new_offs; ++ ++ /* check for overflow */ ++ if (note_start < page_addr || note_start + note_size < note_start) ++ return -EINVAL; ++ ++ /* only supports note that fits in the first page */ ++ if (note_start + note_size > page_addr + PAGE_SIZE) ++ return -EINVAL; ++ ++ while (note_offs + sizeof(Elf32_Nhdr) < note_size) { ++ Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); ++ ++ if (nhdr->n_type == BPF_BUILD_ID && ++ nhdr->n_namesz == sizeof("GNU") && ++ nhdr->n_descsz > 0 && ++ nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { ++ memcpy(build_id, ++ note_start + note_offs + ++ ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), ++ nhdr->n_descsz); ++ memset(build_id + nhdr->n_descsz, 0, ++ BPF_BUILD_ID_SIZE - nhdr->n_descsz); ++ return 0; ++ } ++ new_offs = note_offs + sizeof(Elf32_Nhdr) + ++ ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); ++ if (new_offs <= note_offs) /* overflow */ ++ break; ++ note_offs = new_offs; ++ } ++ return -EINVAL; ++} ++ ++/* Parse build ID from 32-bit ELF */ ++static int stack_map_get_build_id_32(void *page_addr, ++ unsigned char *build_id) ++{ ++ Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr; ++ Elf32_Phdr *phdr; ++ int i; ++ ++ /* only supports phdr that fits in one page */ ++ if (ehdr->e_phnum > ++ (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr)) ++ return -EINVAL; ++ ++ phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr)); ++ ++ for (i = 0; i < ehdr->e_phnum; ++i) ++ if (phdr[i].p_type == PT_NOTE) ++ return stack_map_parse_build_id(page_addr, build_id, ++ page_addr + phdr[i].p_offset, ++ phdr[i].p_filesz); ++ return -EINVAL; ++} ++ ++/* Parse build ID from 64-bit ELF */ ++static int stack_map_get_build_id_64(void *page_addr, ++ unsigned char *build_id) ++{ ++ Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr; ++ Elf64_Phdr *phdr; ++ int i; ++ ++ /* only supports phdr that fits in one page */ ++ if (ehdr->e_phnum > ++ (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr)) ++ return -EINVAL; ++ ++ phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr)); ++ ++ for (i = 0; i < ehdr->e_phnum; ++i) ++ if (phdr[i].p_type == PT_NOTE) ++ return stack_map_parse_build_id(page_addr, build_id, ++ page_addr + phdr[i].p_offset, ++ phdr[i].p_filesz); ++ return -EINVAL; ++} ++ ++/* Parse build ID of ELF file mapped to vma */ ++static int stack_map_get_build_id(struct vm_area_struct *vma, ++ unsigned char *build_id) ++{ ++ Elf32_Ehdr *ehdr; ++ struct page *page; ++ void *page_addr; ++ int ret; ++ ++ /* only works for page backed storage */ ++ if (!vma->vm_file) ++ return -EINVAL; ++ ++ page = find_get_page(vma->vm_file->f_mapping, 0); ++ if (!page) ++ return -EFAULT; /* page not mapped */ ++ ++ ret = -EINVAL; ++ page_addr = kmap_atomic(page); ++ ehdr = (Elf32_Ehdr *)page_addr; ++ ++ /* compare magic x7f "ELF" */ ++ if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) ++ goto out; ++ ++ /* only support executable file and shared object file */ ++ if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ++ goto out; ++ ++ if (ehdr->e_ident[EI_CLASS] == ELFCLASS32) ++ ret = stack_map_get_build_id_32(page_addr, build_id); ++ else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ++ ret = stack_map_get_build_id_64(page_addr, build_id); ++out: ++ kunmap_atomic(page_addr); ++ put_page(page); ++ return ret; ++} ++ ++static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, ++ u64 *ips, u32 trace_nr, bool user) ++{ ++ int i; ++ struct vm_area_struct *vma; ++ bool irq_work_busy = false; ++ struct stack_map_irq_work *work = NULL; ++ ++ if (irqs_disabled()) { ++ work = this_cpu_ptr(&up_read_work); ++ if (work->irq_work.flags & IRQ_WORK_BUSY) ++ /* cannot queue more up_read, fallback */ ++ irq_work_busy = true; ++ } ++ ++ /* ++ * We cannot do up_read() when the irq is disabled, because of ++ * risk to deadlock with rq_lock. To do build_id lookup when the ++ * irqs are disabled, we need to run up_read() in irq_work. We use ++ * a percpu variable to do the irq_work. If the irq_work is ++ * already used by another lookup, we fall back to report ips. ++ * ++ * Same fallback is used for kernel stack (!user) on a stackmap ++ * with build_id. ++ */ ++ if (!user || !current || !current->mm || irq_work_busy || ++ down_read_trylock(¤t->mm->mmap_sem) == 0) { ++ /* cannot access current->mm, fall back to ips */ ++ for (i = 0; i < trace_nr; i++) { ++ id_offs[i].status = BPF_STACK_BUILD_ID_IP; ++ id_offs[i].ip = ips[i]; ++ memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); ++ } ++ return; ++ } ++ ++ for (i = 0; i < trace_nr; i++) { ++ vma = find_vma(current->mm, ips[i]); ++ if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) { ++ /* per entry fall back to ips */ ++ id_offs[i].status = BPF_STACK_BUILD_ID_IP; ++ id_offs[i].ip = ips[i]; ++ memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); ++ continue; ++ } ++ id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] ++ - vma->vm_start; ++ id_offs[i].status = BPF_STACK_BUILD_ID_VALID; ++ } ++ ++ if (!work) { ++ up_read(¤t->mm->mmap_sem); ++ } else { ++ work->sem = ¤t->mm->mmap_sem; ++ irq_work_queue(&work->irq_work); ++ /* ++ * The irq_work will release the mmap_sem with ++ * up_read_non_owner(). The rwsem_release() is called ++ * here to release the lock from lockdep's perspective. ++ */ ++ rwsem_release(¤t->mm->mmap_sem.dep_map, 1, _RET_IP_); ++ } ++} ++ ++BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, ++ u64, flags) ++{ ++ struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); ++ struct perf_callchain_entry *trace; ++ struct stack_map_bucket *bucket, *new_bucket, *old_bucket; ++ u32 max_depth = map->value_size / stack_map_data_size(map); ++ /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ ++ u32 init_nr = sysctl_perf_event_max_stack - max_depth; ++ u32 skip = flags & BPF_F_SKIP_FIELD_MASK; ++ u32 hash, id, trace_nr, trace_len; ++ bool user = flags & BPF_F_USER_STACK; ++ bool kernel = !user; ++ u64 *ips; ++ bool hash_matches; ++ ++ if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | ++ BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) ++ return -EINVAL; ++ ++ trace = get_perf_callchain(regs, init_nr, kernel, user, ++ sysctl_perf_event_max_stack, false, false); ++ ++ if (unlikely(!trace)) ++ /* couldn't fetch the stack trace */ ++ return -EFAULT; ++ ++ /* get_perf_callchain() guarantees that trace->nr >= init_nr ++ * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth ++ */ ++ trace_nr = trace->nr - init_nr; ++ ++ if (trace_nr <= skip) ++ /* skipping more than usable stack trace */ ++ return -EFAULT; ++ ++ trace_nr -= skip; ++ trace_len = trace_nr * sizeof(u64); ++ ips = trace->ip + skip + init_nr; ++ hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); ++ id = hash & (smap->n_buckets - 1); ++ bucket = READ_ONCE(smap->buckets[id]); ++ ++ hash_matches = bucket && bucket->hash == hash; ++ /* fast cmp */ ++ if (hash_matches && flags & BPF_F_FAST_STACK_CMP) ++ return id; ++ ++ if (stack_map_use_build_id(map)) { ++ /* for build_id+offset, pop a bucket before slow cmp */ ++ new_bucket = (struct stack_map_bucket *) ++ pcpu_freelist_pop(&smap->freelist); ++ if (unlikely(!new_bucket)) ++ return -ENOMEM; ++ new_bucket->nr = trace_nr; ++ stack_map_get_build_id_offset( ++ (struct bpf_stack_build_id *)new_bucket->data, ++ ips, trace_nr, user); ++ trace_len = trace_nr * sizeof(struct bpf_stack_build_id); ++ if (hash_matches && bucket->nr == trace_nr && ++ memcmp(bucket->data, new_bucket->data, trace_len) == 0) { ++ pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); ++ return id; ++ } ++ if (bucket && !(flags & BPF_F_REUSE_STACKID)) { ++ pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); ++ return -EEXIST; ++ } ++ } else { ++ if (hash_matches && bucket->nr == trace_nr && ++ memcmp(bucket->data, ips, trace_len) == 0) ++ return id; ++ if (bucket && !(flags & BPF_F_REUSE_STACKID)) ++ return -EEXIST; ++ ++ new_bucket = (struct stack_map_bucket *) ++ pcpu_freelist_pop(&smap->freelist); ++ if (unlikely(!new_bucket)) ++ return -ENOMEM; ++ memcpy(new_bucket->data, ips, trace_len); ++ } ++ ++ new_bucket->hash = hash; ++ new_bucket->nr = trace_nr; ++ ++ old_bucket = xchg(&smap->buckets[id], new_bucket); ++ if (old_bucket) ++ pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); ++ return id; ++} ++ ++const struct bpf_func_proto bpf_get_stackid_proto = { ++ .func = bpf_get_stackid, ++ .gpl_only = true, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_CONST_MAP_PTR, ++ .arg3_type = ARG_ANYTHING, ++}; ++ ++BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, ++ u64, flags) ++{ ++ u32 init_nr, trace_nr, copy_len, elem_size, num_elem; ++ bool user_build_id = flags & BPF_F_USER_BUILD_ID; ++ u32 skip = flags & BPF_F_SKIP_FIELD_MASK; ++ bool user = flags & BPF_F_USER_STACK; ++ struct perf_callchain_entry *trace; ++ bool kernel = !user; ++ int err = -EINVAL; ++ u64 *ips; ++ ++ if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | ++ BPF_F_USER_BUILD_ID))) ++ goto clear; ++ if (kernel && user_build_id) ++ goto clear; ++ ++ elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id) ++ : sizeof(u64); ++ if (unlikely(size % elem_size)) ++ goto clear; ++ ++ num_elem = size / elem_size; ++ if (sysctl_perf_event_max_stack < num_elem) ++ init_nr = 0; ++ else ++ init_nr = sysctl_perf_event_max_stack - num_elem; ++ trace = get_perf_callchain(regs, init_nr, kernel, user, ++ sysctl_perf_event_max_stack, false, false); ++ if (unlikely(!trace)) ++ goto err_fault; ++ ++ trace_nr = trace->nr - init_nr; ++ if (trace_nr < skip) ++ goto err_fault; ++ ++ trace_nr -= skip; ++ trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; ++ copy_len = trace_nr * elem_size; ++ ips = trace->ip + skip + init_nr; ++ if (user && user_build_id) ++ stack_map_get_build_id_offset(buf, ips, trace_nr, user); ++ else ++ memcpy(buf, ips, copy_len); ++ ++ if (size > copy_len) ++ memset(buf + copy_len, 0, size - copy_len); ++ return copy_len; ++ ++err_fault: ++ err = -EFAULT; ++clear: ++ memset(buf, 0, size); ++ return err; ++} ++ ++const struct bpf_func_proto bpf_get_stack_proto = { ++ .func = bpf_get_stack, ++ .gpl_only = true, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_PTR_TO_UNINIT_MEM, ++ .arg3_type = ARG_CONST_SIZE_OR_ZERO, ++ .arg4_type = ARG_ANYTHING, ++}; ++ ++/* Called from eBPF program */ ++static void *stack_map_lookup_elem(struct bpf_map *map, void *key) ++{ ++ return ERR_PTR(-EOPNOTSUPP); ++} ++ ++/* Called from syscall */ ++int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) ++{ ++ struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); ++ struct stack_map_bucket *bucket, *old_bucket; ++ u32 id = *(u32 *)key, trace_len; ++ ++ if (unlikely(id >= smap->n_buckets)) ++ return -ENOENT; ++ ++ bucket = xchg(&smap->buckets[id], NULL); ++ if (!bucket) ++ return -ENOENT; ++ ++ trace_len = bucket->nr * stack_map_data_size(map); ++ memcpy(value, bucket->data, trace_len); ++ memset(value + trace_len, 0, map->value_size - trace_len); ++ ++ old_bucket = xchg(&smap->buckets[id], bucket); ++ if (old_bucket) ++ pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); ++ return 0; ++} ++ ++static int stack_map_get_next_key(struct bpf_map *map, void *key, ++ void *next_key) ++{ ++ struct bpf_stack_map *smap = container_of(map, ++ struct bpf_stack_map, map); ++ u32 id; ++ ++ WARN_ON_ONCE(!rcu_read_lock_held()); ++ ++ if (!key) { ++ id = 0; ++ } else { ++ id = *(u32 *)key; ++ if (id >= smap->n_buckets || !smap->buckets[id]) ++ id = 0; ++ else ++ id++; ++ } ++ ++ while (id < smap->n_buckets && !smap->buckets[id]) ++ id++; ++ ++ if (id >= smap->n_buckets) ++ return -ENOENT; ++ ++ *(u32 *)next_key = id; ++ return 0; ++} ++ ++static int stack_map_update_elem(struct bpf_map *map, void *key, void *value, ++ u64 map_flags) ++{ ++ return -EINVAL; ++} ++ ++/* Called from syscall or from eBPF program */ ++static int stack_map_delete_elem(struct bpf_map *map, void *key) ++{ ++ struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); ++ struct stack_map_bucket *old_bucket; ++ u32 id = *(u32 *)key; ++ ++ if (unlikely(id >= smap->n_buckets)) ++ return -E2BIG; ++ ++ old_bucket = xchg(&smap->buckets[id], NULL); ++ if (old_bucket) { ++ pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); ++ return 0; ++ } else { ++ return -ENOENT; ++ } ++} ++ ++/* Called when map->refcnt goes to zero, either from workqueue or from syscall */ ++static void stack_map_free(struct bpf_map *map) ++{ ++ struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); ++ ++ /* wait for bpf programs to complete before freeing stack map */ ++ synchronize_rcu(); ++ ++ bpf_map_area_free(smap->elems); ++ pcpu_freelist_destroy(&smap->freelist); ++ bpf_map_area_free(smap); ++ put_callchain_buffers(); ++} ++ ++const struct bpf_map_ops stack_trace_map_ops = { ++ .map_alloc = stack_map_alloc, ++ .map_free = stack_map_free, ++ .map_get_next_key = stack_map_get_next_key, ++ .map_lookup_elem = stack_map_lookup_elem, ++ .map_update_elem = stack_map_update_elem, ++ .map_delete_elem = stack_map_delete_elem, ++ .map_check_btf = map_check_no_btf, ++}; ++ ++static int __init stack_map_init(void) ++{ ++ int cpu; ++ struct stack_map_irq_work *work; ++ ++ for_each_possible_cpu(cpu) { ++ work = per_cpu_ptr(&up_read_work, cpu); ++ init_irq_work(&work->irq_work, do_up_read); ++ } ++ return 0; ++} ++subsys_initcall(stack_map_init); +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -1,106 +1,333 @@ ++// SPDX-License-Identifier: GPL-2.0-only + /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of version 2 of the GNU General Public +- * License as published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, but +- * WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. + */ + #include ++#include ++#include + #include + #include ++#include ++#include + #include ++#include + #include ++#include + #include + #include + #include ++#include ++#include ++#include ++#include ++#include ++ ++#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ ++ (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ ++ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ ++ (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) ++#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) ++#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) ++ ++#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) ++ ++DEFINE_PER_CPU(int, bpf_prog_active); ++static DEFINE_IDR(prog_idr); ++static DEFINE_SPINLOCK(prog_idr_lock); ++static DEFINE_IDR(map_idr); ++static DEFINE_SPINLOCK(map_idr_lock); + + int sysctl_unprivileged_bpf_disabled __read_mostly; + +-static LIST_HEAD(bpf_map_types); ++static const struct bpf_map_ops * const bpf_map_types[] = { ++#define BPF_PROG_TYPE(_id, _ops) ++#define BPF_MAP_TYPE(_id, _ops) \ ++ [_id] = &_ops, ++#include ++#undef BPF_PROG_TYPE ++#undef BPF_MAP_TYPE ++}; ++ ++/* ++ * If we're handed a bigger struct than we know of, ensure all the unknown bits ++ * are 0 - i.e. new user-space does not rely on any kernel feature extensions ++ * we don't know about yet. ++ * ++ * There is a ToCToU between this function call and the following ++ * copy_from_user() call. However, this is not a concern since this function is ++ * meant to be a future-proofing of bits. ++ */ ++int bpf_check_uarg_tail_zero(void __user *uaddr, ++ size_t expected_size, ++ size_t actual_size) ++{ ++ unsigned char __user *addr; ++ unsigned char __user *end; ++ unsigned char val; ++ int err; ++ ++ if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ ++ return -E2BIG; ++ ++ if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size))) ++ return -EFAULT; ++ ++ if (actual_size <= expected_size) ++ return 0; ++ ++ addr = uaddr + expected_size; ++ end = uaddr + actual_size; ++ ++ for (; addr < end; addr++) { ++ err = get_user(val, addr); ++ if (err) ++ return err; ++ if (val) ++ return -E2BIG; ++ } ++ ++ return 0; ++} + + static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) + { +- struct bpf_map_type_list *tl; ++ const struct bpf_map_ops *ops; ++ u32 type = attr->map_type; + struct bpf_map *map; ++ int err; + +- list_for_each_entry(tl, &bpf_map_types, list_node) { +- if (tl->type == attr->map_type) { +- map = tl->ops->map_alloc(attr); +- if (IS_ERR(map)) +- return map; +- map->ops = tl->ops; +- map->map_type = attr->map_type; +- return map; +- } ++ if (type >= ARRAY_SIZE(bpf_map_types)) ++ return ERR_PTR(-EINVAL); ++ ops = bpf_map_types[type]; ++ if (!ops) ++ return ERR_PTR(-EINVAL); ++ ++ if (ops->map_alloc_check) { ++ err = ops->map_alloc_check(attr); ++ if (err) ++ return ERR_PTR(err); ++ } ++ map = ops->map_alloc(attr); ++ if (IS_ERR(map)) ++ return map; ++ map->ops = ops; ++ map->map_type = type; ++ return map; ++} ++ ++void *bpf_map_area_alloc(u64 size, int numa_node) ++{ ++ /* We really just want to fail instead of triggering OOM killer ++ * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, ++ * which is used for lower order allocation requests. ++ * ++ * It has been observed that higher order allocation requests done by ++ * vmalloc with __GFP_NORETRY being set might fail due to not trying ++ * to reclaim memory from the page cache, thus we set ++ * __GFP_RETRY_MAYFAIL to avoid such situations. ++ */ ++ ++ const gfp_t flags = __GFP_NOWARN | __GFP_ZERO; ++ void *area; ++ ++ if (size >= SIZE_MAX) ++ return NULL; ++ ++ if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { ++ area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags, ++ numa_node); ++ if (area != NULL) ++ return area; + } +- return ERR_PTR(-EINVAL); ++ ++ return __vmalloc_node_range(size, 1, ++ VMALLOC_START, VMALLOC_END, ++ GFP_KERNEL | flags, ++ PAGE_KERNEL, 0, numa_node, ++ __builtin_return_address(0)); + } + +-/* boot time registration of different map implementations */ +-void bpf_register_map_type(struct bpf_map_type_list *tl) ++void bpf_map_area_free(void *area) + { +- list_add(&tl->list_node, &bpf_map_types); ++ kvfree(area); + } + +-static int bpf_map_charge_memlock(struct bpf_map *map) ++static u32 bpf_map_flags_retain_permanent(u32 flags) + { +- struct user_struct *user = get_current_user(); +- unsigned long memlock_limit; ++ /* Some map creation flags are not tied to the map object but ++ * rather to the map fd instead, so they have no meaning upon ++ * map object inspection since multiple file descriptors with ++ * different (access) properties can exist here. Thus, given ++ * this has zero meaning for the map itself, lets clear these ++ * from here. ++ */ ++ return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); ++} ++ ++void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) ++{ ++ map->map_type = attr->map_type; ++ map->key_size = attr->key_size; ++ map->value_size = attr->value_size; ++ map->max_entries = attr->max_entries; ++ map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); ++ map->numa_node = bpf_map_attr_numa_node(attr); ++} ++ ++static int bpf_charge_memlock(struct user_struct *user, u32 pages) ++{ ++ unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; ++ ++ if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) { ++ atomic_long_sub(pages, &user->locked_vm); ++ return -EPERM; ++ } ++ return 0; ++} ++ ++static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) ++{ ++ if (user) ++ atomic_long_sub(pages, &user->locked_vm); ++} + +- memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; ++int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size) ++{ ++ u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; ++ struct user_struct *user; ++ int ret; + +- atomic_long_add(map->pages, &user->locked_vm); ++ if (size >= U32_MAX - PAGE_SIZE) ++ return -E2BIG; + +- if (atomic_long_read(&user->locked_vm) > memlock_limit) { +- atomic_long_sub(map->pages, &user->locked_vm); ++ user = get_current_user(); ++ ret = bpf_charge_memlock(user, pages); ++ if (ret) { + free_uid(user); +- return -EPERM; ++ return ret; + } +- map->user = user; ++ ++ mem->pages = pages; ++ mem->user = user; ++ + return 0; + } + +-static void bpf_map_uncharge_memlock(struct bpf_map *map) ++void bpf_map_charge_finish(struct bpf_map_memory *mem) + { +- struct user_struct *user = map->user; ++ bpf_uncharge_memlock(mem->user, mem->pages); ++ free_uid(mem->user); ++} + +- atomic_long_sub(map->pages, &user->locked_vm); +- free_uid(user); ++void bpf_map_charge_move(struct bpf_map_memory *dst, ++ struct bpf_map_memory *src) ++{ ++ *dst = *src; ++ ++ /* Make sure src will not be used for the redundant uncharging. */ ++ memset(src, 0, sizeof(struct bpf_map_memory)); ++} ++ ++int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) ++{ ++ int ret; ++ ++ ret = bpf_charge_memlock(map->memory.user, pages); ++ if (ret) ++ return ret; ++ map->memory.pages += pages; ++ return ret; ++} ++ ++void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) ++{ ++ bpf_uncharge_memlock(map->memory.user, pages); ++ map->memory.pages -= pages; ++} ++ ++static int bpf_map_alloc_id(struct bpf_map *map) ++{ ++ int id; ++ ++ idr_preload(GFP_KERNEL); ++ spin_lock_bh(&map_idr_lock); ++ id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); ++ if (id > 0) ++ map->id = id; ++ spin_unlock_bh(&map_idr_lock); ++ idr_preload_end(); ++ ++ if (WARN_ON_ONCE(!id)) ++ return -ENOSPC; ++ ++ return id > 0 ? 0 : id; ++} ++ ++void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) ++{ ++ unsigned long flags; ++ ++ /* Offloaded maps are removed from the IDR store when their device ++ * disappears - even if someone holds an fd to them they are unusable, ++ * the memory is gone, all ops will fail; they are simply waiting for ++ * refcnt to drop to be freed. ++ */ ++ if (!map->id) ++ return; ++ ++ if (do_idr_lock) ++ spin_lock_irqsave(&map_idr_lock, flags); ++ else ++ __acquire(&map_idr_lock); ++ ++ idr_remove(&map_idr, map->id); ++ map->id = 0; ++ ++ if (do_idr_lock) ++ spin_unlock_irqrestore(&map_idr_lock, flags); ++ else ++ __release(&map_idr_lock); + } + + /* called from workqueue */ + static void bpf_map_free_deferred(struct work_struct *work) + { + struct bpf_map *map = container_of(work, struct bpf_map, work); ++ struct bpf_map_memory mem; + +- bpf_map_uncharge_memlock(map); ++ bpf_map_charge_move(&mem, &map->memory); + /* implementation dependent freeing */ + map->ops->map_free(map); ++ bpf_map_charge_finish(&mem); + } + + static void bpf_map_put_uref(struct bpf_map *map) + { + if (atomic_dec_and_test(&map->usercnt)) { +- if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) +- bpf_fd_array_map_clear(map); ++ if (map->ops->map_release_uref) ++ map->ops->map_release_uref(map); + } + } + + /* decrement map refcnt and schedule it for freeing via workqueue + * (unrelying map implementation ops->map_free() might sleep) + */ +-void bpf_map_put(struct bpf_map *map) ++static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) + { + if (atomic_dec_and_test(&map->refcnt)) { ++ /* bpf_map_free_id() must be called first */ ++ bpf_map_free_id(map, do_idr_lock); ++ btf_put(map->btf); + INIT_WORK(&map->work, bpf_map_free_deferred); + schedule_work(&map->work); + } + } + ++void bpf_map_put(struct bpf_map *map) ++{ ++ __bpf_map_put(map, true); ++} ++EXPORT_SYMBOL_GPL(bpf_map_put); ++ + void bpf_map_put_with_uref(struct bpf_map *map) + { + bpf_map_put_uref(map); +@@ -109,18 +336,110 @@ void bpf_map_put_with_uref(struct bpf_ma + + static int bpf_map_release(struct inode *inode, struct file *filp) + { +- bpf_map_put_with_uref(filp->private_data); ++ struct bpf_map *map = filp->private_data; ++ ++ if (map->ops->map_release) ++ map->ops->map_release(map, filp); ++ ++ bpf_map_put_with_uref(map); + return 0; + } + +-static const struct file_operations bpf_map_fops = { +- .release = bpf_map_release, ++static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) ++{ ++ fmode_t mode = f.file->f_mode; ++ ++ /* Our file permissions may have been overridden by global ++ * map permissions facing syscall side. ++ */ ++ if (READ_ONCE(map->frozen)) ++ mode &= ~FMODE_CAN_WRITE; ++ return mode; ++} ++ ++#ifdef CONFIG_PROC_FS ++static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) ++{ ++ const struct bpf_map *map = filp->private_data; ++ const struct bpf_array *array; ++ u32 owner_prog_type = 0; ++ u32 owner_jited = 0; ++ ++ if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { ++ array = container_of(map, struct bpf_array, map); ++ owner_prog_type = array->owner_prog_type; ++ owner_jited = array->owner_jited; ++ } ++ ++ seq_printf(m, ++ "map_type:\t%u\n" ++ "key_size:\t%u\n" ++ "value_size:\t%u\n" ++ "max_entries:\t%u\n" ++ "map_flags:\t%#x\n" ++ "memlock:\t%llu\n" ++ "map_id:\t%u\n" ++ "frozen:\t%u\n", ++ map->map_type, ++ map->key_size, ++ map->value_size, ++ map->max_entries, ++ map->map_flags, ++ map->memory.pages * 1ULL << PAGE_SHIFT, ++ map->id, ++ READ_ONCE(map->frozen)); ++ ++ if (owner_prog_type) { ++ seq_printf(m, "owner_prog_type:\t%u\n", ++ owner_prog_type); ++ seq_printf(m, "owner_jited:\t%u\n", ++ owner_jited); ++ } ++} ++#endif ++ ++static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, ++ loff_t *ppos) ++{ ++ /* We need this handler such that alloc_file() enables ++ * f_mode with FMODE_CAN_READ. ++ */ ++ return -EINVAL; ++} ++ ++static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, ++ size_t siz, loff_t *ppos) ++{ ++ /* We need this handler such that alloc_file() enables ++ * f_mode with FMODE_CAN_WRITE. ++ */ ++ return -EINVAL; ++} ++ ++const struct file_operations bpf_map_fops = { ++#ifdef CONFIG_PROC_FS ++ .show_fdinfo = bpf_map_show_fdinfo, ++#endif ++ .release = bpf_map_release, ++ .read = bpf_dummy_read, ++ .write = bpf_dummy_write, + }; + +-int bpf_map_new_fd(struct bpf_map *map) ++int bpf_map_new_fd(struct bpf_map *map, int flags) + { + return anon_inode_getfd("bpf-map", &bpf_map_fops, map, +- O_RDWR | O_CLOEXEC); ++ flags | O_CLOEXEC); ++} ++ ++int bpf_get_file_flag(int flags) ++{ ++ if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) ++ return -EINVAL; ++ if (flags & BPF_F_RDONLY) ++ return O_RDONLY; ++ if (flags & BPF_F_WRONLY) ++ return O_WRONLY; ++ return O_RDWR; + } + + /* helper macro to check that unused fields 'union bpf_attr' are zero */ +@@ -131,38 +450,171 @@ int bpf_map_new_fd(struct bpf_map *map) + offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ + sizeof(attr->CMD##_LAST_FIELD)) != NULL + +-#define BPF_MAP_CREATE_LAST_FIELD max_entries ++/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. ++ * Return 0 on success and < 0 on error. ++ */ ++static int bpf_obj_name_cpy(char *dst, const char *src) ++{ ++ const char *end = src + BPF_OBJ_NAME_LEN; ++ ++ memset(dst, 0, BPF_OBJ_NAME_LEN); ++ /* Copy all isalnum(), '_' and '.' chars. */ ++ while (src < end && *src) { ++ if (!isalnum(*src) && ++ *src != '_' && *src != '.') ++ return -EINVAL; ++ *dst++ = *src++; ++ } ++ ++ /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ ++ if (src == end) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++int map_check_no_btf(const struct bpf_map *map, ++ const struct btf *btf, ++ const struct btf_type *key_type, ++ const struct btf_type *value_type) ++{ ++ return -ENOTSUPP; ++} ++ ++static int map_check_btf(struct bpf_map *map, const struct btf *btf, ++ u32 btf_key_id, u32 btf_value_id) ++{ ++ const struct btf_type *key_type, *value_type; ++ u32 key_size, value_size; ++ int ret = 0; ++ ++ /* Some maps allow key to be unspecified. */ ++ if (btf_key_id) { ++ key_type = btf_type_id_size(btf, &btf_key_id, &key_size); ++ if (!key_type || key_size != map->key_size) ++ return -EINVAL; ++ } else { ++ key_type = btf_type_by_id(btf, 0); ++ if (!map->ops->map_check_btf) ++ return -EINVAL; ++ } ++ ++ value_type = btf_type_id_size(btf, &btf_value_id, &value_size); ++ if (!value_type || value_size != map->value_size) ++ return -EINVAL; ++ ++ map->spin_lock_off = btf_find_spin_lock(btf, value_type); ++ ++ if (map_value_has_spin_lock(map)) { ++ if (map->map_flags & BPF_F_RDONLY_PROG) ++ return -EACCES; ++ if (map->map_type != BPF_MAP_TYPE_HASH && ++ map->map_type != BPF_MAP_TYPE_ARRAY && ++ map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && ++ map->map_type != BPF_MAP_TYPE_SK_STORAGE) ++ return -ENOTSUPP; ++ if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > ++ map->value_size) { ++ WARN_ONCE(1, ++ "verifier bug spin_lock_off %d value_size %d\n", ++ map->spin_lock_off, map->value_size); ++ return -EFAULT; ++ } ++ } ++ ++ if (map->ops->map_check_btf) ++ ret = map->ops->map_check_btf(map, btf, key_type, value_type); ++ ++ return ret; ++} ++ ++#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id + /* called via syscall */ + static int map_create(union bpf_attr *attr) + { ++ int numa_node = bpf_map_attr_numa_node(attr); ++ struct bpf_map_memory mem; + struct bpf_map *map; ++ int f_flags; + int err; + + err = CHECK_ATTR(BPF_MAP_CREATE); + if (err) + return -EINVAL; + ++ f_flags = bpf_get_file_flag(attr->map_flags); ++ if (f_flags < 0) ++ return f_flags; ++ ++ if (numa_node != NUMA_NO_NODE && ++ ((unsigned int)numa_node >= nr_node_ids || ++ !node_online(numa_node))) ++ return -EINVAL; ++ + /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ + map = find_and_alloc_map(attr); + if (IS_ERR(map)) + return PTR_ERR(map); + ++ err = bpf_obj_name_cpy(map->name, attr->map_name); ++ if (err) ++ goto free_map; ++ + atomic_set(&map->refcnt, 1); + atomic_set(&map->usercnt, 1); + +- err = bpf_map_charge_memlock(map); ++ if (attr->btf_key_type_id || attr->btf_value_type_id) { ++ struct btf *btf; ++ ++ if (!attr->btf_value_type_id) { ++ err = -EINVAL; ++ goto free_map; ++ } ++ ++ btf = btf_get_by_fd(attr->btf_fd); ++ if (IS_ERR(btf)) { ++ err = PTR_ERR(btf); ++ goto free_map; ++ } ++ ++ err = map_check_btf(map, btf, attr->btf_key_type_id, ++ attr->btf_value_type_id); ++ if (err) { ++ btf_put(btf); ++ goto free_map; ++ } ++ ++ map->btf = btf; ++ map->btf_key_type_id = attr->btf_key_type_id; ++ map->btf_value_type_id = attr->btf_value_type_id; ++ } else { ++ map->spin_lock_off = -EINVAL; ++ } ++ ++ err = bpf_map_alloc_id(map); + if (err) +- goto free_map; ++ goto free_map_sec; + +- err = bpf_map_new_fd(map); +- if (err < 0) +- /* failed to allocate fd */ +- goto free_map; ++ err = bpf_map_new_fd(map, f_flags); ++ if (err < 0) { ++ /* failed to allocate fd. ++ * bpf_map_put_with_uref() is needed because the above ++ * bpf_map_alloc_id() has published the map ++ * to the userspace and the userspace may ++ * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. ++ */ ++ bpf_map_put_with_uref(map); ++ return err; ++ } + + return err; + ++free_map_sec: + free_map: ++ btf_put(map->btf); ++ bpf_map_charge_move(&mem, &map->memory); + map->ops->map_free(map); ++ bpf_map_charge_finish(&mem); + return err; + } + +@@ -194,6 +646,7 @@ struct bpf_map *bpf_map_inc(struct bpf_m + atomic_inc(&map->usercnt); + return map; + } ++EXPORT_SYMBOL_GPL(bpf_map_inc); + + struct bpf_map *bpf_map_get_with_uref(u32 ufd) + { +@@ -210,59 +663,155 @@ struct bpf_map *bpf_map_get_with_uref(u3 + return map; + } + +-/* helper to convert user pointers passed inside __aligned_u64 fields */ +-static void __user *u64_to_ptr(__u64 val) ++/* map_idr_lock should have been held */ ++static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, ++ bool uref) ++{ ++ int refold; ++ ++ refold = atomic_fetch_add_unless(&map->refcnt, 1, 0); ++ ++ if (refold >= BPF_MAX_REFCNT) { ++ __bpf_map_put(map, false); ++ return ERR_PTR(-EBUSY); ++ } ++ ++ if (!refold) ++ return ERR_PTR(-ENOENT); ++ ++ if (uref) ++ atomic_inc(&map->usercnt); ++ ++ return map; ++} ++ ++struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, bool uref) ++{ ++ spin_lock_bh(&map_idr_lock); ++ map = __bpf_map_inc_not_zero(map, uref); ++ spin_unlock_bh(&map_idr_lock); ++ ++ return map; ++} ++EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); ++ ++int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) ++{ ++ return -ENOTSUPP; ++} ++ ++static void *__bpf_copy_key(void __user *ukey, u64 key_size) + { +- return (void __user *) (unsigned long) val; ++ if (key_size) ++ return memdup_user(ukey, key_size); ++ ++ if (ukey) ++ return ERR_PTR(-EINVAL); ++ ++ return NULL; + } + + /* last field in 'union bpf_attr' used by this command */ +-#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value ++#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags + + static int map_lookup_elem(union bpf_attr *attr) + { +- void __user *ukey = u64_to_ptr(attr->key); +- void __user *uvalue = u64_to_ptr(attr->value); ++ void __user *ukey = u64_to_user_ptr(attr->key); ++ void __user *uvalue = u64_to_user_ptr(attr->value); + int ufd = attr->map_fd; + struct bpf_map *map; + void *key, *value, *ptr; ++ u32 value_size; + struct fd f; + int err; + + if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) + return -EINVAL; + ++ if (attr->flags & ~BPF_F_LOCK) ++ return -EINVAL; ++ + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); ++ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { ++ err = -EPERM; ++ goto err_put; ++ } + +- err = -ENOMEM; +- key = kmalloc(map->key_size, GFP_USER); +- if (!key) ++ if ((attr->flags & BPF_F_LOCK) && ++ !map_value_has_spin_lock(map)) { ++ err = -EINVAL; + goto err_put; ++ } + +- err = -EFAULT; +- if (copy_from_user(key, ukey, map->key_size) != 0) +- goto free_key; ++ key = __bpf_copy_key(ukey, map->key_size); ++ if (IS_ERR(key)) { ++ err = PTR_ERR(key); ++ goto err_put; ++ } ++ ++ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || ++ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || ++ map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) ++ value_size = round_up(map->value_size, 8) * num_possible_cpus(); ++ else if (IS_FD_MAP(map)) ++ value_size = sizeof(u32); ++ else ++ value_size = map->value_size; + + err = -ENOMEM; +- value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); ++ value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); + if (!value) + goto free_key; + +- rcu_read_lock(); +- ptr = map->ops->map_lookup_elem(map, key); +- if (ptr) +- memcpy(value, ptr, map->value_size); +- rcu_read_unlock(); ++ preempt_disable(); ++ this_cpu_inc(bpf_prog_active); ++ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || ++ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { ++ err = bpf_percpu_hash_copy(map, key, value); ++ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { ++ err = bpf_percpu_array_copy(map, key, value); ++ } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { ++ err = bpf_stackmap_copy(map, key, value); ++ } else if (IS_FD_ARRAY(map)) { ++ err = bpf_fd_array_map_lookup_elem(map, key, value); ++ } else if (IS_FD_HASH(map)) { ++ err = bpf_fd_htab_map_lookup_elem(map, key, value); ++ } else if (map->map_type == BPF_MAP_TYPE_QUEUE || ++ map->map_type == BPF_MAP_TYPE_STACK) { ++ err = map->ops->map_peek_elem(map, value); ++ } else { ++ rcu_read_lock(); ++ if (map->ops->map_lookup_elem_sys_only) ++ ptr = map->ops->map_lookup_elem_sys_only(map, key); ++ else ++ ptr = map->ops->map_lookup_elem(map, key); ++ if (IS_ERR(ptr)) { ++ err = PTR_ERR(ptr); ++ } else if (!ptr) { ++ err = -ENOENT; ++ } else { ++ err = 0; ++ if (attr->flags & BPF_F_LOCK) ++ /* lock 'ptr' and copy everything but lock */ ++ copy_map_value_locked(map, value, ptr, true); ++ else ++ copy_map_value(map, value, ptr); ++ /* mask lock, since value wasn't zero inited */ ++ check_and_init_map_lock(map, value); ++ } ++ rcu_read_unlock(); ++ } ++ this_cpu_dec(bpf_prog_active); ++ preempt_enable(); + +- err = -ENOENT; +- if (!ptr) ++ if (err) + goto free_value; + + err = -EFAULT; +- if (copy_to_user(uvalue, value, map->value_size) != 0) ++ if (copy_to_user(uvalue, value, value_size) != 0) + goto free_value; + + err = 0; +@@ -276,15 +825,27 @@ err_put: + return err; + } + ++static void maybe_wait_bpf_programs(struct bpf_map *map) ++{ ++ /* Wait for any running BPF programs to complete so that ++ * userspace, when we return to it, knows that all programs ++ * that could be running use the new map value. ++ */ ++ if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || ++ map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) ++ synchronize_rcu(); ++} ++ + #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags + + static int map_update_elem(union bpf_attr *attr) + { +- void __user *ukey = u64_to_ptr(attr->key); +- void __user *uvalue = u64_to_ptr(attr->value); ++ void __user *ukey = u64_to_user_ptr(attr->key); ++ void __user *uvalue = u64_to_user_ptr(attr->value); + int ufd = attr->map_fd; + struct bpf_map *map; + void *key, *value; ++ u32 value_size; + struct fd f; + int err; + +@@ -295,32 +856,79 @@ static int map_update_elem(union bpf_att + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); ++ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { ++ err = -EPERM; ++ goto err_put; ++ } + +- err = -ENOMEM; +- key = kmalloc(map->key_size, GFP_USER); +- if (!key) ++ if ((attr->flags & BPF_F_LOCK) && ++ !map_value_has_spin_lock(map)) { ++ err = -EINVAL; ++ goto err_put; ++ } ++ ++ key = __bpf_copy_key(ukey, map->key_size); ++ if (IS_ERR(key)) { ++ err = PTR_ERR(key); + goto err_put; ++ } + +- err = -EFAULT; +- if (copy_from_user(key, ukey, map->key_size) != 0) +- goto free_key; ++ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || ++ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || ++ map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) ++ value_size = round_up(map->value_size, 8) * num_possible_cpus(); ++ else ++ value_size = map->value_size; + + err = -ENOMEM; +- value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); ++ value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); + if (!value) + goto free_key; + + err = -EFAULT; +- if (copy_from_user(value, uvalue, map->value_size) != 0) ++ if (copy_from_user(value, uvalue, value_size) != 0) + goto free_value; + +- /* eBPF program that use maps are running under rcu_read_lock(), +- * therefore all map accessors rely on this fact, so do the same here +- */ +- rcu_read_lock(); +- err = map->ops->map_update_elem(map, key, value, attr->flags); +- rcu_read_unlock(); ++ /* Need to create a kthread, thus must support schedule */ ++ if (map->map_type == BPF_MAP_TYPE_CPUMAP || ++ map->map_type == BPF_MAP_TYPE_SOCKHASH || ++ map->map_type == BPF_MAP_TYPE_SOCKMAP) { ++ err = map->ops->map_update_elem(map, key, value, attr->flags); ++ goto out; ++ } + ++ /* must increment bpf_prog_active to avoid kprobe+bpf triggering from ++ * inside bpf map update or delete otherwise deadlocks are possible ++ */ ++ preempt_disable(); ++ __this_cpu_inc(bpf_prog_active); ++ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || ++ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { ++ err = bpf_percpu_hash_update(map, key, value, attr->flags); ++ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { ++ err = bpf_percpu_array_update(map, key, value, attr->flags); ++ } else if (IS_FD_ARRAY(map)) { ++ rcu_read_lock(); ++ err = bpf_fd_array_map_update_elem(map, f.file, key, value, ++ attr->flags); ++ rcu_read_unlock(); ++ } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { ++ rcu_read_lock(); ++ err = bpf_fd_htab_map_update_elem(map, f.file, key, value, ++ attr->flags); ++ rcu_read_unlock(); ++ } else if (map->map_type == BPF_MAP_TYPE_QUEUE || ++ map->map_type == BPF_MAP_TYPE_STACK) { ++ err = map->ops->map_push_elem(map, value, attr->flags); ++ } else { ++ rcu_read_lock(); ++ err = map->ops->map_update_elem(map, key, value, attr->flags); ++ rcu_read_unlock(); ++ } ++ __this_cpu_dec(bpf_prog_active); ++ preempt_enable(); ++ maybe_wait_bpf_programs(map); ++out: + free_value: + kfree(value); + free_key: +@@ -334,7 +942,7 @@ err_put: + + static int map_delete_elem(union bpf_attr *attr) + { +- void __user *ukey = u64_to_ptr(attr->key); ++ void __user *ukey = u64_to_user_ptr(attr->key); + int ufd = attr->map_fd; + struct bpf_map *map; + struct fd f; +@@ -348,21 +956,25 @@ static int map_delete_elem(union bpf_att + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); +- +- err = -ENOMEM; +- key = kmalloc(map->key_size, GFP_USER); +- if (!key) ++ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { ++ err = -EPERM; + goto err_put; ++ } + +- err = -EFAULT; +- if (copy_from_user(key, ukey, map->key_size) != 0) +- goto free_key; ++ key = __bpf_copy_key(ukey, map->key_size); ++ if (IS_ERR(key)) { ++ err = PTR_ERR(key); ++ goto err_put; ++ } + ++ preempt_disable(); ++ __this_cpu_inc(bpf_prog_active); + rcu_read_lock(); + err = map->ops->map_delete_elem(map, key); + rcu_read_unlock(); +- +-free_key: ++ __this_cpu_dec(bpf_prog_active); ++ preempt_enable(); ++ maybe_wait_bpf_programs(map); + kfree(key); + err_put: + fdput(f); +@@ -374,8 +986,8 @@ err_put: + + static int map_get_next_key(union bpf_attr *attr) + { +- void __user *ukey = u64_to_ptr(attr->key); +- void __user *unext_key = u64_to_ptr(attr->next_key); ++ void __user *ukey = u64_to_user_ptr(attr->key); ++ void __user *unext_key = u64_to_user_ptr(attr->next_key); + int ufd = attr->map_fd; + struct bpf_map *map; + void *key, *next_key; +@@ -389,15 +1001,20 @@ static int map_get_next_key(union bpf_at + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); +- +- err = -ENOMEM; +- key = kmalloc(map->key_size, GFP_USER); +- if (!key) ++ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { ++ err = -EPERM; + goto err_put; ++ } + +- err = -EFAULT; +- if (copy_from_user(key, ukey, map->key_size) != 0) +- goto free_key; ++ if (ukey) { ++ key = __bpf_copy_key(ukey, map->key_size); ++ if (IS_ERR(key)) { ++ err = PTR_ERR(key); ++ goto err_put; ++ } ++ } else { ++ key = NULL; ++ } + + err = -ENOMEM; + next_key = kmalloc(map->key_size, GFP_USER); +@@ -425,77 +1042,126 @@ err_put: + return err; + } + +-static LIST_HEAD(bpf_prog_types); ++#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value + +-static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) ++static int map_lookup_and_delete_elem(union bpf_attr *attr) + { +- struct bpf_prog_type_list *tl; ++ void __user *ukey = u64_to_user_ptr(attr->key); ++ void __user *uvalue = u64_to_user_ptr(attr->value); ++ int ufd = attr->map_fd; ++ struct bpf_map *map; ++ void *key, *value; ++ u32 value_size; ++ struct fd f; ++ int err; + +- list_for_each_entry(tl, &bpf_prog_types, list_node) { +- if (tl->type == type) { +- prog->aux->ops = tl->ops; +- prog->type = type; +- return 0; +- } ++ if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) ++ return -EINVAL; ++ ++ f = fdget(ufd); ++ map = __bpf_map_get(f); ++ if (IS_ERR(map)) ++ return PTR_ERR(map); ++ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || ++ !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { ++ err = -EPERM; ++ goto err_put; + } + +- return -EINVAL; +-} ++ key = __bpf_copy_key(ukey, map->key_size); ++ if (IS_ERR(key)) { ++ err = PTR_ERR(key); ++ goto err_put; ++ } + +-void bpf_register_prog_type(struct bpf_prog_type_list *tl) +-{ +- list_add(&tl->list_node, &bpf_prog_types); ++ value_size = map->value_size; ++ ++ err = -ENOMEM; ++ value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); ++ if (!value) ++ goto free_key; ++ ++ if (map->map_type == BPF_MAP_TYPE_QUEUE || ++ map->map_type == BPF_MAP_TYPE_STACK) { ++ err = map->ops->map_pop_elem(map, value); ++ } else { ++ err = -ENOTSUPP; ++ } ++ ++ if (err) ++ goto free_value; ++ ++ if (copy_to_user(uvalue, value, value_size) != 0) { ++ err = -EFAULT; ++ goto free_value; ++ } ++ ++ err = 0; ++ ++free_value: ++ kfree(value); ++free_key: ++ kfree(key); ++err_put: ++ fdput(f); ++ return err; + } + +-/* fixup insn->imm field of bpf_call instructions: +- * if (insn->imm == BPF_FUNC_map_lookup_elem) +- * insn->imm = bpf_map_lookup_elem - __bpf_call_base; +- * else if (insn->imm == BPF_FUNC_map_update_elem) +- * insn->imm = bpf_map_update_elem - __bpf_call_base; +- * else ... +- * +- * this function is called after eBPF program passed verification +- */ +-static void fixup_bpf_calls(struct bpf_prog *prog) ++#define BPF_MAP_FREEZE_LAST_FIELD map_fd ++ ++static int map_freeze(const union bpf_attr *attr) + { +- const struct bpf_func_proto *fn; +- int i; ++ int err = 0, ufd = attr->map_fd; ++ struct bpf_map *map; ++ struct fd f; + +- for (i = 0; i < prog->len; i++) { +- struct bpf_insn *insn = &prog->insnsi[i]; ++ if (CHECK_ATTR(BPF_MAP_FREEZE)) ++ return -EINVAL; + +- if (insn->code == (BPF_JMP | BPF_CALL)) { +- /* we reach here when program has bpf_call instructions +- * and it passed bpf_check(), means that +- * ops->get_func_proto must have been supplied, check it +- */ +- BUG_ON(!prog->aux->ops->get_func_proto); ++ f = fdget(ufd); ++ map = __bpf_map_get(f); ++ if (IS_ERR(map)) ++ return PTR_ERR(map); ++ if (READ_ONCE(map->frozen)) { ++ err = -EBUSY; ++ goto err_put; ++ } ++ if (!capable(CAP_SYS_ADMIN)) { ++ err = -EPERM; ++ goto err_put; ++ } + +- if (insn->imm == BPF_FUNC_get_route_realm) +- prog->dst_needed = 1; +- if (insn->imm == BPF_FUNC_get_prandom_u32) +- bpf_user_rnd_init_once(); +- if (insn->imm == BPF_FUNC_tail_call) { +- /* mark bpf_tail_call as different opcode +- * to avoid conditional branch in +- * interpeter for every normal call +- * and to prevent accidental JITing by +- * JIT compiler that doesn't support +- * bpf_tail_call yet +- */ +- insn->imm = 0; +- insn->code |= BPF_X; +- continue; +- } ++ WRITE_ONCE(map->frozen, true); ++err_put: ++ fdput(f); ++ return err; ++} + +- fn = prog->aux->ops->get_func_proto(insn->imm); +- /* all functions that have prototype and verifier allowed +- * programs to call them, must be real in-kernel functions +- */ +- BUG_ON(!fn->func); +- insn->imm = fn->func - __bpf_call_base; +- } +- } ++static const struct bpf_prog_ops * const bpf_prog_types[] = { ++#define BPF_PROG_TYPE(_id, _name) \ ++ [_id] = & _name ## _prog_ops, ++#define BPF_MAP_TYPE(_id, _ops) ++#include ++#undef BPF_PROG_TYPE ++#undef BPF_MAP_TYPE ++}; ++ ++static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) ++{ ++ const struct bpf_prog_ops *ops; ++ ++ if (type >= ARRAY_SIZE(bpf_prog_types)) ++ return -EINVAL; ++ ops = bpf_prog_types[type]; ++ if (!ops) ++ return -EINVAL; ++ ++ if (!bpf_prog_is_dev_bound(prog->aux)) ++ prog->aux->ops = ops; ++ else ++ return -EINVAL; ++ prog->type = type; ++ return 0; + } + + /* drop refcnt on maps used by eBPF program and free auxilary data */ +@@ -509,19 +1175,39 @@ static void free_used_maps(struct bpf_pr + kfree(aux->used_maps); + } + ++int __bpf_prog_charge(struct user_struct *user, u32 pages) ++{ ++ unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; ++ unsigned long user_bufs; ++ ++ if (user) { ++ user_bufs = atomic_long_add_return(pages, &user->locked_vm); ++ if (user_bufs > memlock_limit) { ++ atomic_long_sub(pages, &user->locked_vm); ++ return -EPERM; ++ } ++ } ++ ++ return 0; ++} ++ ++void __bpf_prog_uncharge(struct user_struct *user, u32 pages) ++{ ++ if (user) ++ atomic_long_sub(pages, &user->locked_vm); ++} ++ + static int bpf_prog_charge_memlock(struct bpf_prog *prog) + { + struct user_struct *user = get_current_user(); +- unsigned long memlock_limit; ++ int ret; + +- memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; +- +- atomic_long_add(prog->pages, &user->locked_vm); +- if (atomic_long_read(&user->locked_vm) > memlock_limit) { +- atomic_long_sub(prog->pages, &user->locked_vm); ++ ret = __bpf_prog_charge(user, prog->pages); ++ if (ret) { + free_uid(user); +- return -EPERM; ++ return ret; + } ++ + prog->aux->user = user; + return 0; + } +@@ -530,30 +1216,87 @@ static void bpf_prog_uncharge_memlock(st + { + struct user_struct *user = prog->aux->user; + +- atomic_long_sub(prog->pages, &user->locked_vm); ++ __bpf_prog_uncharge(user, prog->pages); + free_uid(user); + } + +-static void __prog_put_common(struct rcu_head *rcu) ++static int bpf_prog_alloc_id(struct bpf_prog *prog) ++{ ++ int id; ++ ++ idr_preload(GFP_KERNEL); ++ spin_lock_bh(&prog_idr_lock); ++ id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); ++ if (id > 0) ++ prog->aux->id = id; ++ spin_unlock_bh(&prog_idr_lock); ++ idr_preload_end(); ++ ++ /* id is in [1, INT_MAX) */ ++ if (WARN_ON_ONCE(!id)) ++ return -ENOSPC; ++ ++ return id > 0 ? 0 : id; ++} ++ ++void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) ++{ ++ /* cBPF to eBPF migrations are currently not in the idr store. ++ * Offloaded programs are removed from the store when their device ++ * disappears - even if someone grabs an fd to them they are unusable, ++ * simply waiting for refcnt to drop to be freed. ++ */ ++ if (!prog->aux->id) ++ return; ++ ++ if (do_idr_lock) ++ spin_lock_bh(&prog_idr_lock); ++ else ++ __acquire(&prog_idr_lock); ++ ++ idr_remove(&prog_idr, prog->aux->id); ++ prog->aux->id = 0; ++ ++ if (do_idr_lock) ++ spin_unlock_bh(&prog_idr_lock); ++ else ++ __release(&prog_idr_lock); ++} ++ ++static void __bpf_prog_put_rcu(struct rcu_head *rcu) + { + struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); + ++ kvfree(aux->func_info); + free_used_maps(aux); + bpf_prog_uncharge_memlock(aux->prog); + bpf_prog_free(aux->prog); + } + +-/* version of bpf_prog_put() that is called after a grace period */ +-void bpf_prog_put_rcu(struct bpf_prog *prog) ++static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) ++{ ++ bpf_prog_kallsyms_del_all(prog); ++ btf_put(prog->aux->btf); ++ bpf_prog_free_linfo(prog); ++ ++ if (deferred) ++ call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); ++ else ++ __bpf_prog_put_rcu(&prog->aux->rcu); ++} ++ ++static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) + { +- if (atomic_dec_and_test(&prog->aux->refcnt)) +- call_rcu(&prog->aux->rcu, __prog_put_common); ++ if (atomic_dec_and_test(&prog->aux->refcnt)) { ++ /* bpf_prog_free_id() must be called first */ ++ bpf_prog_free_id(prog, do_idr_lock); ++ __bpf_prog_put_noref(prog, true); ++ } + } + + void bpf_prog_put(struct bpf_prog *prog) + { +- if (atomic_dec_and_test(&prog->aux->refcnt)) +- __prog_put_common(&prog->aux->rcu); ++ __bpf_prog_put(prog, true); + } + EXPORT_SYMBOL_GPL(bpf_prog_put); + +@@ -561,12 +1304,68 @@ static int bpf_prog_release(struct inode + { + struct bpf_prog *prog = filp->private_data; + +- bpf_prog_put_rcu(prog); ++ bpf_prog_put(prog); + return 0; + } + +-static const struct file_operations bpf_prog_fops = { +- .release = bpf_prog_release, ++static void bpf_prog_get_stats(const struct bpf_prog *prog, ++ struct bpf_prog_stats *stats) ++{ ++ u64 nsecs = 0, cnt = 0; ++ int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ const struct bpf_prog_stats *st; ++ unsigned int start; ++ u64 tnsecs, tcnt; ++ ++ st = per_cpu_ptr(prog->aux->stats, cpu); ++ do { ++ start = u64_stats_fetch_begin_irq(&st->syncp); ++ tnsecs = st->nsecs; ++ tcnt = st->cnt; ++ } while (u64_stats_fetch_retry_irq(&st->syncp, start)); ++ nsecs += tnsecs; ++ cnt += tcnt; ++ } ++ stats->nsecs = nsecs; ++ stats->cnt = cnt; ++} ++ ++#ifdef CONFIG_PROC_FS ++static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) ++{ ++ const struct bpf_prog *prog = filp->private_data; ++ char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; ++ struct bpf_prog_stats stats; ++ ++ bpf_prog_get_stats(prog, &stats); ++ bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); ++ seq_printf(m, ++ "prog_type:\t%u\n" ++ "prog_jited:\t%u\n" ++ "prog_tag:\t%s\n" ++ "memlock:\t%llu\n" ++ "prog_id:\t%u\n" ++ "run_time_ns:\t%llu\n" ++ "run_cnt:\t%llu\n", ++ prog->type, ++ prog->jited, ++ prog_tag, ++ prog->pages * 1ULL << PAGE_SHIFT, ++ prog->aux->id, ++ stats.nsecs, ++ stats.cnt); ++} ++#endif ++ ++const struct file_operations bpf_prog_fops = { ++#ifdef CONFIG_PROC_FS ++ .show_fdinfo = bpf_prog_show_fdinfo, ++#endif ++ .release = bpf_prog_release, ++ .read = bpf_dummy_read, ++ .write = bpf_dummy_write, + }; + + int bpf_prog_new_fd(struct bpf_prog *prog) +@@ -575,7 +1374,7 @@ int bpf_prog_new_fd(struct bpf_prog *pro + O_RDWR | O_CLOEXEC); + } + +-static struct bpf_prog *__bpf_prog_get(struct fd f) ++static struct bpf_prog *____bpf_prog_get(struct fd f) + { + if (!f.file) + return ERR_PTR(-EBADF); +@@ -587,38 +1386,178 @@ static struct bpf_prog *__bpf_prog_get(s + return f.file->private_data; + } + ++struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) ++{ ++ if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { ++ atomic_sub(i, &prog->aux->refcnt); ++ return ERR_PTR(-EBUSY); ++ } ++ return prog; ++} ++EXPORT_SYMBOL_GPL(bpf_prog_add); ++ ++void bpf_prog_sub(struct bpf_prog *prog, int i) ++{ ++ /* Only to be used for undoing previous bpf_prog_add() in some ++ * error path. We still know that another entity in our call ++ * path holds a reference to the program, thus atomic_sub() can ++ * be safely used in such cases! ++ */ ++ WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); ++} ++EXPORT_SYMBOL_GPL(bpf_prog_sub); ++ + struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) + { +- if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) { +- atomic_dec(&prog->aux->refcnt); ++ return bpf_prog_add(prog, 1); ++} ++EXPORT_SYMBOL_GPL(bpf_prog_inc); ++ ++/* prog_idr_lock should have been held */ ++struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) ++{ ++ int refold; ++ ++ refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0); ++ ++ if (refold >= BPF_MAX_REFCNT) { ++ __bpf_prog_put(prog, false); + return ERR_PTR(-EBUSY); + } ++ ++ if (!refold) ++ return ERR_PTR(-ENOENT); ++ + return prog; + } ++EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); + +-/* called by sockets/tracing/seccomp before attaching program to an event +- * pairs with bpf_prog_put() +- */ +-struct bpf_prog *bpf_prog_get(u32 ufd) ++bool bpf_prog_get_ok(struct bpf_prog *prog, ++ enum bpf_prog_type *attach_type, bool attach_drv) ++{ ++ /* not an attachment, just a refcount inc, always allow */ ++ if (!attach_type) ++ return true; ++ ++ if (prog->type != *attach_type) ++ return false; ++ if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv) ++ return false; ++ ++ return true; ++} ++ ++static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, ++ bool attach_drv) + { + struct fd f = fdget(ufd); + struct bpf_prog *prog; + +- prog = __bpf_prog_get(f); ++ prog = ____bpf_prog_get(f); + if (IS_ERR(prog)) + return prog; ++ if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { ++ prog = ERR_PTR(-EINVAL); ++ goto out; ++ } + + prog = bpf_prog_inc(prog); ++out: + fdput(f); +- + return prog; + } +-EXPORT_SYMBOL_GPL(bpf_prog_get); ++ ++struct bpf_prog *bpf_prog_get(u32 ufd) ++{ ++ return __bpf_prog_get(ufd, NULL, false); ++} ++ ++struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, ++ bool attach_drv) ++{ ++ return __bpf_prog_get(ufd, &type, attach_drv); ++} ++EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); ++ ++/* Initially all BPF programs could be loaded w/o specifying ++ * expected_attach_type. Later for some of them specifying expected_attach_type ++ * at load time became required so that program could be validated properly. ++ * Programs of types that are allowed to be loaded both w/ and w/o (for ++ * backward compatibility) expected_attach_type, should have the default attach ++ * type assigned to expected_attach_type for the latter case, so that it can be ++ * validated later at attach time. ++ * ++ * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if ++ * prog type requires it but has some attach types that have to be backward ++ * compatible. ++ */ ++static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) ++{ ++ switch (attr->prog_type) { ++ case BPF_PROG_TYPE_CGROUP_SOCK: ++ /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't ++ * exist so checking for non-zero is the way to go here. ++ */ ++ if (!attr->expected_attach_type) ++ attr->expected_attach_type = ++ BPF_CGROUP_INET_SOCK_CREATE; ++ break; ++ } ++} ++ ++static int ++bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, ++ enum bpf_attach_type expected_attach_type) ++{ ++ switch (prog_type) { ++ case BPF_PROG_TYPE_CGROUP_SOCK: ++ switch (expected_attach_type) { ++ case BPF_CGROUP_INET_SOCK_CREATE: ++ case BPF_CGROUP_INET4_POST_BIND: ++ case BPF_CGROUP_INET6_POST_BIND: ++ return 0; ++ default: ++ return -EINVAL; ++ } ++ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: ++ switch (expected_attach_type) { ++ case BPF_CGROUP_INET4_BIND: ++ case BPF_CGROUP_INET6_BIND: ++ case BPF_CGROUP_INET4_CONNECT: ++ case BPF_CGROUP_INET6_CONNECT: ++ case BPF_CGROUP_UDP4_SENDMSG: ++ case BPF_CGROUP_UDP6_SENDMSG: ++ case BPF_CGROUP_UDP4_RECVMSG: ++ case BPF_CGROUP_UDP6_RECVMSG: ++ return 0; ++ default: ++ return -EINVAL; ++ } ++ case BPF_PROG_TYPE_CGROUP_SKB: ++ switch (expected_attach_type) { ++ case BPF_CGROUP_INET_INGRESS: ++ case BPF_CGROUP_INET_EGRESS: ++ return 0; ++ default: ++ return -EINVAL; ++ } ++ case BPF_PROG_TYPE_CGROUP_SOCKOPT: ++ switch (expected_attach_type) { ++ case BPF_CGROUP_SETSOCKOPT: ++ case BPF_CGROUP_GETSOCKOPT: ++ return 0; ++ default: ++ return -EINVAL; ++ } ++ default: ++ return 0; ++ } ++} + + /* last field in 'union bpf_attr' used by this command */ +-#define BPF_PROG_LOAD_LAST_FIELD kern_version ++#define BPF_PROG_LOAD_LAST_FIELD line_info_cnt + +-static int bpf_prog_load(union bpf_attr *attr) ++static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) + { + enum bpf_prog_type type = attr->prog_type; + struct bpf_prog *prog; +@@ -629,8 +1568,19 @@ static int bpf_prog_load(union bpf_attr + if (CHECK_ATTR(BPF_PROG_LOAD)) + return -EINVAL; + ++ if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | ++ BPF_F_ANY_ALIGNMENT | ++ BPF_F_TEST_STATE_FREQ | ++ BPF_F_TEST_RND_HI32)) ++ return -EINVAL; ++ ++ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && ++ (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && ++ !capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ + /* copy eBPF program license from user space */ +- if (strncpy_from_user(license, u64_to_ptr(attr->license), ++ if (strncpy_from_user(license, u64_to_user_ptr(attr->license), + sizeof(license) - 1) < 0) + return -EFAULT; + license[sizeof(license) - 1] = 0; +@@ -638,30 +1588,36 @@ static int bpf_prog_load(union bpf_attr + /* eBPF programs must be GPL compatible to use GPL-ed functions */ + is_gpl = license_is_gpl_compatible(license); + +- if (attr->insn_cnt >= BPF_MAXINSNS) +- return -EINVAL; ++ if (attr->insn_cnt == 0 || ++ attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) ++ return -E2BIG; ++ if (type != BPF_PROG_TYPE_SOCKET_FILTER && ++ type != BPF_PROG_TYPE_CGROUP_SKB && ++ !capable(CAP_SYS_ADMIN)) ++ return -EPERM; + +- if (type == BPF_PROG_TYPE_KPROBE && +- attr->kern_version != LINUX_VERSION_CODE) ++ bpf_prog_load_fixup_attach_type(attr); ++ if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type)) + return -EINVAL; + +- if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) +- return -EPERM; +- + /* plain bpf_prog allocation */ + prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); + if (!prog) + return -ENOMEM; + ++ prog->expected_attach_type = attr->expected_attach_type; ++ ++ prog->aux->offload_requested = !!attr->prog_ifindex; ++ + err = bpf_prog_charge_memlock(prog); + if (err) +- goto free_prog_nouncharge; ++ goto free_prog_sec; + + prog->len = attr->insn_cnt; + + err = -EFAULT; +- if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), +- prog->len * sizeof(struct bpf_insn)) != 0) ++ if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), ++ bpf_prog_insn_size(prog)) != 0) + goto free_prog; + + prog->orig_prog = NULL; +@@ -675,91 +1631,720 @@ static int bpf_prog_load(union bpf_attr + if (err < 0) + goto free_prog; + ++ prog->aux->load_time = ktime_get_boot_ns(); ++ err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); ++ if (err) ++ goto free_prog; ++ + /* run eBPF verifier */ +- err = bpf_check(&prog, attr); ++ err = bpf_check(&prog, attr, uattr); + if (err < 0) + goto free_used_maps; + +- /* fixup BPF_CALL->imm field */ +- fixup_bpf_calls(prog); +- +- /* eBPF program is ready to be JITed */ +- err = bpf_prog_select_runtime(prog); ++ prog = bpf_prog_select_runtime(prog, &err); + if (err < 0) + goto free_used_maps; + +- err = bpf_prog_new_fd(prog); +- if (err < 0) +- /* failed to allocate fd */ ++ err = bpf_prog_alloc_id(prog); ++ if (err) + goto free_used_maps; + ++ /* Upon success of bpf_prog_alloc_id(), the BPF prog is ++ * effectively publicly exposed. However, retrieving via ++ * bpf_prog_get_fd_by_id() will take another reference, ++ * therefore it cannot be gone underneath us. ++ * ++ * Only for the time /after/ successful bpf_prog_new_fd() ++ * and before returning to userspace, we might just hold ++ * one reference and any parallel close on that fd could ++ * rip everything out. Hence, below notifications must ++ * happen before bpf_prog_new_fd(). ++ * ++ * Also, any failure handling from this point onwards must ++ * be using bpf_prog_put() given the program is exposed. ++ */ ++ bpf_prog_kallsyms_add(prog); ++ ++ err = bpf_prog_new_fd(prog); ++ if (err < 0) ++ bpf_prog_put(prog); + return err; + + free_used_maps: +- free_used_maps(prog->aux); ++ /* In case we have subprogs, we need to wait for a grace ++ * period before we can tear down JIT memory since symbols ++ * are already exposed under kallsyms. ++ */ ++ __bpf_prog_put_noref(prog, prog->aux->func_cnt); ++ return err; + free_prog: + bpf_prog_uncharge_memlock(prog); +-free_prog_nouncharge: ++free_prog_sec: + bpf_prog_free(prog); + return err; + } + +-#define BPF_OBJ_LAST_FIELD bpf_fd ++#define BPF_OBJ_LAST_FIELD file_flags + + static int bpf_obj_pin(const union bpf_attr *attr) + { +- if (CHECK_ATTR(BPF_OBJ)) ++ if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) + return -EINVAL; + +- return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname)); ++ return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); + } + + static int bpf_obj_get(const union bpf_attr *attr) + { +- if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) ++ if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || ++ attr->file_flags & ~BPF_OBJ_FLAG_MASK) + return -EINVAL; + +- return bpf_obj_get_user(u64_to_ptr(attr->pathname)); ++ return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), ++ attr->file_flags); + } + +-SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) ++ ++#define BPF_PROG_ATTACH_LAST_FIELD attach_flags ++ ++#define BPF_F_ATTACH_MASK \ ++ (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) ++ ++ ++#define BPF_PROG_DETACH_LAST_FIELD attach_type ++ ++ ++#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt ++ ++ ++#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out ++ ++static int bpf_prog_test_run(const union bpf_attr *attr, ++ union bpf_attr __user *uattr) + { +- union bpf_attr attr = {}; +- int err; ++ struct bpf_prog *prog; ++ int ret = -ENOTSUPP; + +- if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) ++ if (!capable(CAP_SYS_ADMIN)) + return -EPERM; ++ if (CHECK_ATTR(BPF_PROG_TEST_RUN)) ++ return -EINVAL; ++ ++ if ((attr->test.ctx_size_in && !attr->test.ctx_in) || ++ (!attr->test.ctx_size_in && attr->test.ctx_in)) ++ return -EINVAL; ++ ++ if ((attr->test.ctx_size_out && !attr->test.ctx_out) || ++ (!attr->test.ctx_size_out && attr->test.ctx_out)) ++ return -EINVAL; ++ ++ prog = bpf_prog_get(attr->test.prog_fd); ++ if (IS_ERR(prog)) ++ return PTR_ERR(prog); ++ ++ if (prog->aux->ops->test_run) ++ ret = prog->aux->ops->test_run(prog, attr, uattr); ++ ++ bpf_prog_put(prog); ++ return ret; ++} ++ ++#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id ++ ++static int bpf_obj_get_next_id(const union bpf_attr *attr, ++ union bpf_attr __user *uattr, ++ struct idr *idr, ++ spinlock_t *lock) ++{ ++ u32 next_id = attr->start_id; ++ int err = 0; ++ ++ if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) ++ return -EINVAL; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ next_id++; ++ spin_lock_bh(lock); ++ if (!idr_get_next(idr, &next_id)) ++ err = -ENOENT; ++ spin_unlock_bh(lock); ++ ++ if (!err) ++ err = put_user(next_id, &uattr->next_id); ++ ++ return err; ++} ++ ++#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id ++ ++static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) ++{ ++ struct bpf_prog *prog; ++ u32 id = attr->prog_id; ++ int fd; ++ ++ if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) ++ return -EINVAL; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ spin_lock_bh(&prog_idr_lock); ++ prog = idr_find(&prog_idr, id); ++ if (prog) ++ prog = bpf_prog_inc_not_zero(prog); ++ else ++ prog = ERR_PTR(-ENOENT); ++ spin_unlock_bh(&prog_idr_lock); ++ ++ if (IS_ERR(prog)) ++ return PTR_ERR(prog); ++ ++ fd = bpf_prog_new_fd(prog); ++ if (fd < 0) ++ bpf_prog_put(prog); ++ ++ return fd; ++} ++ ++#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags ++ ++static int bpf_map_get_fd_by_id(const union bpf_attr *attr) ++{ ++ struct bpf_map *map; ++ u32 id = attr->map_id; ++ int f_flags; ++ int fd; ++ ++ if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || ++ attr->open_flags & ~BPF_OBJ_FLAG_MASK) ++ return -EINVAL; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ f_flags = bpf_get_file_flag(attr->open_flags); ++ if (f_flags < 0) ++ return f_flags; ++ ++ spin_lock_bh(&map_idr_lock); ++ map = idr_find(&map_idr, id); ++ if (map) ++ map = __bpf_map_inc_not_zero(map, true); ++ else ++ map = ERR_PTR(-ENOENT); ++ spin_unlock_bh(&map_idr_lock); ++ ++ if (IS_ERR(map)) ++ return PTR_ERR(map); + +- if (!access_ok(VERIFY_READ, uattr, 1)) ++ fd = bpf_map_new_fd(map, f_flags); ++ if (fd < 0) ++ bpf_map_put_with_uref(map); ++ ++ return fd; ++} ++ ++static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, ++ unsigned long addr, u32 *off, ++ u32 *type) ++{ ++ const struct bpf_map *map; ++ int i; ++ ++ for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { ++ map = prog->aux->used_maps[i]; ++ if (map == (void *)addr) { ++ *type = BPF_PSEUDO_MAP_FD; ++ return map; ++ } ++ if (!map->ops->map_direct_value_meta) ++ continue; ++ if (!map->ops->map_direct_value_meta(map, addr, off)) { ++ *type = BPF_PSEUDO_MAP_VALUE; ++ return map; ++ } ++ } ++ ++ return NULL; ++} ++ ++static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, ++ const struct cred *f_cred) ++{ ++ const struct bpf_map *map; ++ struct bpf_insn *insns; ++ u32 off, type; ++ u64 imm; ++ int i; ++ ++ insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), ++ GFP_USER); ++ if (!insns) ++ return insns; ++ ++ for (i = 0; i < prog->len; i++) { ++ if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) { ++ insns[i].code = BPF_JMP | BPF_CALL; ++ insns[i].imm = BPF_FUNC_tail_call; ++ /* fall-through */ ++ } ++ if (insns[i].code == (BPF_JMP | BPF_CALL) || ++ insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) { ++ if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) ++ insns[i].code = BPF_JMP | BPF_CALL; ++ if (!bpf_dump_raw_ok(f_cred)) ++ insns[i].imm = 0; ++ continue; ++ } ++ ++ if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW)) ++ continue; ++ ++ imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; ++ map = bpf_map_from_imm(prog, imm, &off, &type); ++ if (map) { ++ insns[i].src_reg = type; ++ insns[i].imm = map->id; ++ insns[i + 1].imm = off; ++ continue; ++ } ++ } ++ ++ return insns; ++} ++ ++static int set_info_rec_size(struct bpf_prog_info *info) ++{ ++ /* ++ * Ensure info.*_rec_size is the same as kernel expected size ++ * ++ * or ++ * ++ * Only allow zero *_rec_size if both _rec_size and _cnt are ++ * zero. In this case, the kernel will set the expected ++ * _rec_size back to the info. ++ */ ++ ++ if ((info->nr_func_info || info->func_info_rec_size) && ++ info->func_info_rec_size != sizeof(struct bpf_func_info)) ++ return -EINVAL; ++ ++ if ((info->nr_line_info || info->line_info_rec_size) && ++ info->line_info_rec_size != sizeof(struct bpf_line_info)) ++ return -EINVAL; ++ ++ if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && ++ info->jited_line_info_rec_size != sizeof(__u64)) ++ return -EINVAL; ++ ++ info->func_info_rec_size = sizeof(struct bpf_func_info); ++ info->line_info_rec_size = sizeof(struct bpf_line_info); ++ info->jited_line_info_rec_size = sizeof(__u64); ++ ++ return 0; ++} ++ ++static int bpf_prog_get_info_by_fd(struct file *file, ++ struct bpf_prog *prog, ++ const union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); ++ struct bpf_prog_info info; ++ u32 info_len = attr->info.info_len; ++ struct bpf_prog_stats stats; ++ char __user *uinsns; ++ u32 ulen; ++ int err; ++ ++ err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len); ++ if (err) ++ return err; ++ info_len = min_t(u32, sizeof(info), info_len); ++ ++ memset(&info, 0, sizeof(info)); ++ if (copy_from_user(&info, uinfo, info_len)) + return -EFAULT; + +- if (size > PAGE_SIZE) /* silly large */ +- return -E2BIG; ++ info.type = prog->type; ++ info.id = prog->aux->id; ++ info.load_time = prog->aux->load_time; ++ info.created_by_uid = from_kuid_munged(current_user_ns(), ++ prog->aux->user->uid); ++ info.gpl_compatible = prog->gpl_compatible; ++ ++ memcpy(info.tag, prog->tag, sizeof(prog->tag)); ++ memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); ++ ++ ulen = info.nr_map_ids; ++ info.nr_map_ids = prog->aux->used_map_cnt; ++ ulen = min_t(u32, info.nr_map_ids, ulen); ++ if (ulen) { ++ u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); ++ u32 i; ++ ++ for (i = 0; i < ulen; i++) ++ if (put_user(prog->aux->used_maps[i]->id, ++ &user_map_ids[i])) ++ return -EFAULT; ++ } ++ ++ err = set_info_rec_size(&info); ++ if (err) ++ return err; ++ ++ bpf_prog_get_stats(prog, &stats); ++ info.run_time_ns = stats.nsecs; ++ info.run_cnt = stats.cnt; ++ ++ if (!capable(CAP_SYS_ADMIN)) { ++ info.jited_prog_len = 0; ++ info.xlated_prog_len = 0; ++ info.nr_jited_ksyms = 0; ++ info.nr_jited_func_lens = 0; ++ info.nr_func_info = 0; ++ info.nr_line_info = 0; ++ info.nr_jited_line_info = 0; ++ goto done; ++ } ++ ++ ulen = info.xlated_prog_len; ++ info.xlated_prog_len = bpf_prog_insn_size(prog); ++ if (info.xlated_prog_len && ulen) { ++ struct bpf_insn *insns_sanitized; ++ bool fault; ++ ++ if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) { ++ info.xlated_prog_insns = 0; ++ goto done; ++ } ++ insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); ++ if (!insns_sanitized) ++ return -ENOMEM; ++ uinsns = u64_to_user_ptr(info.xlated_prog_insns); ++ ulen = min_t(u32, info.xlated_prog_len, ulen); ++ fault = copy_to_user(uinsns, insns_sanitized, ulen); ++ kfree(insns_sanitized); ++ if (fault) ++ return -EFAULT; ++ } ++ ++ /* NOTE: the following code is supposed to be skipped for offload. ++ * bpf_prog_offload_info_fill() is the place to fill similar fields ++ * for offload. ++ */ ++ ulen = info.jited_prog_len; ++ if (prog->aux->func_cnt) { ++ u32 i; ++ ++ info.jited_prog_len = 0; ++ for (i = 0; i < prog->aux->func_cnt; i++) ++ info.jited_prog_len += prog->aux->func[i]->jited_len; ++ } else { ++ info.jited_prog_len = prog->jited_len; ++ } ++ ++ if (info.jited_prog_len && ulen) { ++ if (bpf_dump_raw_ok(file->f_cred)) { ++ uinsns = u64_to_user_ptr(info.jited_prog_insns); ++ ulen = min_t(u32, info.jited_prog_len, ulen); ++ ++ /* for multi-function programs, copy the JITed ++ * instructions for all the functions ++ */ ++ if (prog->aux->func_cnt) { ++ u32 len, free, i; ++ u8 *img; ++ ++ free = ulen; ++ for (i = 0; i < prog->aux->func_cnt; i++) { ++ len = prog->aux->func[i]->jited_len; ++ len = min_t(u32, len, free); ++ img = (u8 *) prog->aux->func[i]->bpf_func; ++ if (copy_to_user(uinsns, img, len)) ++ return -EFAULT; ++ uinsns += len; ++ free -= len; ++ if (!free) ++ break; ++ } ++ } else { ++ if (copy_to_user(uinsns, prog->bpf_func, ulen)) ++ return -EFAULT; ++ } ++ } else { ++ info.jited_prog_insns = 0; ++ } ++ } ++ ++ ulen = info.nr_jited_ksyms; ++ info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; ++ if (ulen) { ++ if (bpf_dump_raw_ok(file->f_cred)) { ++ unsigned long ksym_addr; ++ u64 __user *user_ksyms; ++ u32 i; ++ ++ /* copy the address of the kernel symbol ++ * corresponding to each function ++ */ ++ ulen = min_t(u32, info.nr_jited_ksyms, ulen); ++ user_ksyms = u64_to_user_ptr(info.jited_ksyms); ++ if (prog->aux->func_cnt) { ++ for (i = 0; i < ulen; i++) { ++ ksym_addr = (unsigned long) ++ prog->aux->func[i]->bpf_func; ++ if (put_user((u64) ksym_addr, ++ &user_ksyms[i])) ++ return -EFAULT; ++ } ++ } else { ++ ksym_addr = (unsigned long) prog->bpf_func; ++ if (put_user((u64) ksym_addr, &user_ksyms[0])) ++ return -EFAULT; ++ } ++ } else { ++ info.jited_ksyms = 0; ++ } ++ } ++ ++ ulen = info.nr_jited_func_lens; ++ info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; ++ if (ulen) { ++ if (bpf_dump_raw_ok(file->f_cred)) { ++ u32 __user *user_lens; ++ u32 func_len, i; ++ ++ /* copy the JITed image lengths for each function */ ++ ulen = min_t(u32, info.nr_jited_func_lens, ulen); ++ user_lens = u64_to_user_ptr(info.jited_func_lens); ++ if (prog->aux->func_cnt) { ++ for (i = 0; i < ulen; i++) { ++ func_len = ++ prog->aux->func[i]->jited_len; ++ if (put_user(func_len, &user_lens[i])) ++ return -EFAULT; ++ } ++ } else { ++ func_len = prog->jited_len; ++ if (put_user(func_len, &user_lens[0])) ++ return -EFAULT; ++ } ++ } else { ++ info.jited_func_lens = 0; ++ } ++ } ++ ++ if (prog->aux->btf) ++ info.btf_id = btf_id(prog->aux->btf); ++ ++ ulen = info.nr_func_info; ++ info.nr_func_info = prog->aux->func_info_cnt; ++ if (info.nr_func_info && ulen) { ++ char __user *user_finfo; ++ ++ user_finfo = u64_to_user_ptr(info.func_info); ++ ulen = min_t(u32, info.nr_func_info, ulen); ++ if (copy_to_user(user_finfo, prog->aux->func_info, ++ info.func_info_rec_size * ulen)) ++ return -EFAULT; ++ } ++ ++ ulen = info.nr_line_info; ++ info.nr_line_info = prog->aux->nr_linfo; ++ if (info.nr_line_info && ulen) { ++ __u8 __user *user_linfo; ++ ++ user_linfo = u64_to_user_ptr(info.line_info); ++ ulen = min_t(u32, info.nr_line_info, ulen); ++ if (copy_to_user(user_linfo, prog->aux->linfo, ++ info.line_info_rec_size * ulen)) ++ return -EFAULT; ++ } + +- /* If we're handed a bigger struct than we know of, +- * ensure all the unknown bits are 0 - i.e. new +- * user-space does not rely on any kernel feature +- * extensions we dont know about yet. +- */ +- if (size > sizeof(attr)) { +- unsigned char __user *addr; +- unsigned char __user *end; +- unsigned char val; +- +- addr = (void __user *)uattr + sizeof(attr); +- end = (void __user *)uattr + size; +- +- for (; addr < end; addr++) { +- err = get_user(val, addr); +- if (err) +- return err; +- if (val) +- return -E2BIG; ++ ulen = info.nr_jited_line_info; ++ if (prog->aux->jited_linfo) ++ info.nr_jited_line_info = prog->aux->nr_linfo; ++ else ++ info.nr_jited_line_info = 0; ++ if (info.nr_jited_line_info && ulen) { ++ if (bpf_dump_raw_ok(file->f_cred)) { ++ __u64 __user *user_linfo; ++ u32 i; ++ ++ user_linfo = u64_to_user_ptr(info.jited_line_info); ++ ulen = min_t(u32, info.nr_jited_line_info, ulen); ++ for (i = 0; i < ulen; i++) { ++ if (put_user((__u64)(long)prog->aux->jited_linfo[i], ++ &user_linfo[i])) ++ return -EFAULT; ++ } ++ } else { ++ info.jited_line_info = 0; + } +- size = sizeof(attr); + } + ++ ulen = info.nr_prog_tags; ++ info.nr_prog_tags = prog->aux->func_cnt ? : 1; ++ if (ulen) { ++ __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; ++ u32 i; ++ ++ user_prog_tags = u64_to_user_ptr(info.prog_tags); ++ ulen = min_t(u32, info.nr_prog_tags, ulen); ++ if (prog->aux->func_cnt) { ++ for (i = 0; i < ulen; i++) { ++ if (copy_to_user(user_prog_tags[i], ++ prog->aux->func[i]->tag, ++ BPF_TAG_SIZE)) ++ return -EFAULT; ++ } ++ } else { ++ if (copy_to_user(user_prog_tags[0], ++ prog->tag, BPF_TAG_SIZE)) ++ return -EFAULT; ++ } ++ } ++ ++done: ++ if (copy_to_user(uinfo, &info, info_len) || ++ put_user(info_len, &uattr->info.info_len)) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++static int bpf_map_get_info_by_fd(struct file *file, ++ struct bpf_map *map, ++ const union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); ++ struct bpf_map_info info; ++ u32 info_len = attr->info.info_len; ++ int err; ++ ++ err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len); ++ if (err) ++ return err; ++ info_len = min_t(u32, sizeof(info), info_len); ++ ++ memset(&info, 0, sizeof(info)); ++ info.type = map->map_type; ++ info.id = map->id; ++ info.key_size = map->key_size; ++ info.value_size = map->value_size; ++ info.max_entries = map->max_entries; ++ info.map_flags = map->map_flags; ++ memcpy(info.name, map->name, sizeof(map->name)); ++ ++ if (map->btf) { ++ info.btf_id = btf_id(map->btf); ++ info.btf_key_type_id = map->btf_key_type_id; ++ info.btf_value_type_id = map->btf_value_type_id; ++ } ++ ++ if (copy_to_user(uinfo, &info, info_len) || ++ put_user(info_len, &uattr->info.info_len)) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++static int bpf_btf_get_info_by_fd(struct file *file, ++ struct btf *btf, ++ const union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); ++ u32 info_len = attr->info.info_len; ++ int err; ++ ++ err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len); ++ if (err) ++ return err; ++ ++ return btf_get_info_by_fd(btf, attr, uattr); ++} ++ ++#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info ++ ++static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ int ufd = attr->info.bpf_fd; ++ struct fd f; ++ int err; ++ ++ if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) ++ return -EINVAL; ++ ++ f = fdget(ufd); ++ if (!f.file) ++ return -EBADFD; ++ ++ if (f.file->f_op == &bpf_prog_fops) ++ err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr, ++ uattr); ++ else if (f.file->f_op == &bpf_map_fops) ++ err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr, ++ uattr); ++ else if (f.file->f_op == &btf_fops) ++ err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); ++ else ++ err = -EINVAL; ++ ++ fdput(f); ++ return err; ++} ++ ++#define BPF_BTF_LOAD_LAST_FIELD btf_log_level ++ ++static int bpf_btf_load(const union bpf_attr *attr) ++{ ++ if (CHECK_ATTR(BPF_BTF_LOAD)) ++ return -EINVAL; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ return btf_new_fd(attr); ++} ++ ++#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id ++ ++static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) ++{ ++ if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) ++ return -EINVAL; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ return btf_get_fd_by_id(attr->btf_id); ++} ++ ++ ++#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr ++ ++SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) ++{ ++ union bpf_attr attr; ++ int err; ++ ++ if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); ++ if (err) ++ return err; ++ size = min_t(u32, size, sizeof(attr)); ++ + /* copy attributes from user space, may be less than sizeof(bpf_attr) */ ++ memset(&attr, 0, sizeof(attr)); + if (copy_from_user(&attr, uattr, size) != 0) + return -EFAULT; + +@@ -779,8 +2364,11 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf + case BPF_MAP_GET_NEXT_KEY: + err = map_get_next_key(&attr); + break; ++ case BPF_MAP_FREEZE: ++ err = map_freeze(&attr); ++ break; + case BPF_PROG_LOAD: +- err = bpf_prog_load(&attr); ++ err = bpf_prog_load(&attr, uattr); + break; + case BPF_OBJ_PIN: + err = bpf_obj_pin(&attr); +@@ -788,6 +2376,39 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf + case BPF_OBJ_GET: + err = bpf_obj_get(&attr); + break; ++ case BPF_PROG_TEST_RUN: ++ err = bpf_prog_test_run(&attr, uattr); ++ break; ++ case BPF_PROG_GET_NEXT_ID: ++ err = bpf_obj_get_next_id(&attr, uattr, ++ &prog_idr, &prog_idr_lock); ++ break; ++ case BPF_MAP_GET_NEXT_ID: ++ err = bpf_obj_get_next_id(&attr, uattr, ++ &map_idr, &map_idr_lock); ++ break; ++ case BPF_BTF_GET_NEXT_ID: ++ err = bpf_obj_get_next_id(&attr, uattr, ++ &btf_idr, &btf_idr_lock); ++ break; ++ case BPF_PROG_GET_FD_BY_ID: ++ err = bpf_prog_get_fd_by_id(&attr); ++ break; ++ case BPF_MAP_GET_FD_BY_ID: ++ err = bpf_map_get_fd_by_id(&attr); ++ break; ++ case BPF_OBJ_GET_INFO_BY_FD: ++ err = bpf_obj_get_info_by_fd(&attr, uattr); ++ break; ++ case BPF_BTF_LOAD: ++ err = bpf_btf_load(&attr); ++ break; ++ case BPF_BTF_GET_FD_BY_ID: ++ err = bpf_btf_get_fd_by_id(&attr); ++ break; ++ case BPF_MAP_LOOKUP_AND_DELETE_ELEM: ++ err = map_lookup_and_delete_elem(&attr); ++ break; + default: + err = -EINVAL; + break; +--- /dev/null ++++ b/kernel/bpf/sysfs_btf.c +@@ -0,0 +1,45 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Provide kernel BTF information for introspection and use by eBPF tools. ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++/* See scripts/link-vmlinux.sh, gen_btf() func for details */ ++extern char __weak __start_BTF[]; ++extern char __weak __stop_BTF[]; ++ ++static ssize_t ++btf_vmlinux_read(struct file *file, struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t len) ++{ ++ memcpy(buf, __start_BTF + off, len); ++ return len; ++} ++ ++static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = { ++ .attr = { .name = "vmlinux", .mode = 0444, }, ++ .read = btf_vmlinux_read, ++}; ++ ++static struct kobject *btf_kobj; ++ ++static int __init btf_vmlinux_init(void) ++{ ++ bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF; ++ ++ if (!__start_BTF || bin_attr_btf_vmlinux.size == 0) ++ return 0; ++ ++ btf_kobj = kobject_create_and_add("btf", kernel_kobj); ++ if (!btf_kobj) ++ return -ENOMEM; ++ ++ return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_vmlinux); ++} ++ ++subsys_initcall(btf_vmlinux_init); +--- /dev/null ++++ b/kernel/bpf/tnum.c +@@ -0,0 +1,196 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* tnum: tracked (or tristate) numbers ++ * ++ * A tnum tracks knowledge about the bits of a value. Each bit can be either ++ * known (0 or 1), or unknown (x). Arithmetic operations on tnums will ++ * propagate the unknown bits such that the tnum result represents all the ++ * possible results for possible values of the operands. ++ */ ++#include ++#include ++ ++#define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m} ++/* A completely unknown value */ ++const struct tnum tnum_unknown = { .value = 0, .mask = -1 }; ++ ++struct tnum tnum_const(u64 value) ++{ ++ return TNUM(value, 0); ++} ++ ++struct tnum tnum_range(u64 min, u64 max) ++{ ++ u64 chi = min ^ max, delta; ++ u8 bits = fls64(chi); ++ ++ /* special case, needed because 1ULL << 64 is undefined */ ++ if (bits > 63) ++ return tnum_unknown; ++ /* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7. ++ * if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return ++ * constant min (since min == max). ++ */ ++ delta = (1ULL << bits) - 1; ++ return TNUM(min & ~delta, delta); ++} ++ ++struct tnum tnum_lshift(struct tnum a, u8 shift) ++{ ++ return TNUM(a.value << shift, a.mask << shift); ++} ++ ++struct tnum tnum_rshift(struct tnum a, u8 shift) ++{ ++ return TNUM(a.value >> shift, a.mask >> shift); ++} ++ ++struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness) ++{ ++ /* if a.value is negative, arithmetic shifting by minimum shift ++ * will have larger negative offset compared to more shifting. ++ * If a.value is nonnegative, arithmetic shifting by minimum shift ++ * will have larger positive offset compare to more shifting. ++ */ ++ if (insn_bitness == 32) ++ return TNUM((u32)(((s32)a.value) >> min_shift), ++ (u32)(((s32)a.mask) >> min_shift)); ++ else ++ return TNUM((s64)a.value >> min_shift, ++ (s64)a.mask >> min_shift); ++} ++ ++struct tnum tnum_add(struct tnum a, struct tnum b) ++{ ++ u64 sm, sv, sigma, chi, mu; ++ ++ sm = a.mask + b.mask; ++ sv = a.value + b.value; ++ sigma = sm + sv; ++ chi = sigma ^ sv; ++ mu = chi | a.mask | b.mask; ++ return TNUM(sv & ~mu, mu); ++} ++ ++struct tnum tnum_sub(struct tnum a, struct tnum b) ++{ ++ u64 dv, alpha, beta, chi, mu; ++ ++ dv = a.value - b.value; ++ alpha = dv + a.mask; ++ beta = dv - b.mask; ++ chi = alpha ^ beta; ++ mu = chi | a.mask | b.mask; ++ return TNUM(dv & ~mu, mu); ++} ++ ++struct tnum tnum_and(struct tnum a, struct tnum b) ++{ ++ u64 alpha, beta, v; ++ ++ alpha = a.value | a.mask; ++ beta = b.value | b.mask; ++ v = a.value & b.value; ++ return TNUM(v, alpha & beta & ~v); ++} ++ ++struct tnum tnum_or(struct tnum a, struct tnum b) ++{ ++ u64 v, mu; ++ ++ v = a.value | b.value; ++ mu = a.mask | b.mask; ++ return TNUM(v, mu & ~v); ++} ++ ++struct tnum tnum_xor(struct tnum a, struct tnum b) ++{ ++ u64 v, mu; ++ ++ v = a.value ^ b.value; ++ mu = a.mask | b.mask; ++ return TNUM(v & ~mu, mu); ++} ++ ++/* half-multiply add: acc += (unknown * mask * value). ++ * An intermediate step in the multiply algorithm. ++ */ ++static struct tnum hma(struct tnum acc, u64 value, u64 mask) ++{ ++ while (mask) { ++ if (mask & 1) ++ acc = tnum_add(acc, TNUM(0, value)); ++ mask >>= 1; ++ value <<= 1; ++ } ++ return acc; ++} ++ ++struct tnum tnum_mul(struct tnum a, struct tnum b) ++{ ++ struct tnum acc; ++ u64 pi; ++ ++ pi = a.value * b.value; ++ acc = hma(TNUM(pi, 0), a.mask, b.mask | b.value); ++ return hma(acc, b.mask, a.value); ++} ++ ++/* Note that if a and b disagree - i.e. one has a 'known 1' where the other has ++ * a 'known 0' - this will return a 'known 1' for that bit. ++ */ ++struct tnum tnum_intersect(struct tnum a, struct tnum b) ++{ ++ u64 v, mu; ++ ++ v = a.value | b.value; ++ mu = a.mask & b.mask; ++ return TNUM(v & ~mu, mu); ++} ++ ++struct tnum tnum_cast(struct tnum a, u8 size) ++{ ++ a.value &= (1ULL << (size * 8)) - 1; ++ a.mask &= (1ULL << (size * 8)) - 1; ++ return a; ++} ++ ++bool tnum_is_aligned(struct tnum a, u64 size) ++{ ++ if (!size) ++ return true; ++ return !((a.value | a.mask) & (size - 1)); ++} ++ ++bool tnum_in(struct tnum a, struct tnum b) ++{ ++ if (b.mask & ~a.mask) ++ return false; ++ b.value &= ~a.mask; ++ return a.value == b.value; ++} ++ ++int tnum_strn(char *str, size_t size, struct tnum a) ++{ ++ return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask); ++} ++EXPORT_SYMBOL_GPL(tnum_strn); ++ ++int tnum_sbin(char *str, size_t size, struct tnum a) ++{ ++ size_t n; ++ ++ for (n = 64; n; n--) { ++ if (n < size) { ++ if (a.mask & 1) ++ str[n - 1] = 'x'; ++ else if (a.value & 1) ++ str[n - 1] = '1'; ++ else ++ str[n - 1] = '0'; ++ } ++ a.mask >>= 1; ++ a.value >>= 1; ++ } ++ str[min(size - 1, (size_t)64)] = 0; ++ return 64; ++} +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1,22 +1,36 @@ ++// SPDX-License-Identifier: GPL-2.0-only + /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of version 2 of the GNU General Public +- * License as published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, but +- * WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. ++ * Copyright (c) 2016 Facebook ++ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io + */ ++#include + #include + #include + #include + #include ++#include ++#include + #include + #include + #include + #include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "disasm.h" ++ ++static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { ++#define BPF_PROG_TYPE(_id, _name) \ ++ [_id] = & _name ## _verifier_ops, ++#define BPF_MAP_TYPE(_id, _ops) ++#include ++#undef BPF_PROG_TYPE ++#undef BPF_MAP_TYPE ++}; + + /* bpf_check() is a static code analyzer that walks eBPF program + * instruction by instruction and updates register/stack state. +@@ -30,7 +44,7 @@ + * - out of bounds or malformed jumps + * The second pass is all possible path descent from the 1st insn. + * Since it's analyzing all pathes through the program, the length of the +- * analysis is limited to 32k insn, which may be hit even if total number of ++ * analysis is limited to 64k insn, which may be hit even if total number of + * insn is less then 4K, but there are too many branches that change stack/regs. + * Number of 'branches to be analyzed' is limited to 1k + * +@@ -58,13 +72,13 @@ + * (and -20 constant is saved for further stack bounds checking). + * Meaning that this reg is a pointer to stack plus known immediate constant. + * +- * Most of the time the registers have UNKNOWN_VALUE type, which ++ * Most of the time the registers have SCALAR_VALUE type, which + * means the register has some value, but it's not a valid pointer. +- * (like pointer plus pointer becomes UNKNOWN_VALUE type) ++ * (like pointer plus pointer becomes SCALAR_VALUE type) + * + * When verifier sees load or store instructions the type of base register +- * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer +- * types recognized by check_mem_access() function. ++ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are ++ * four pointer types recognized by check_mem_access() function. + * + * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' + * and the range of [ptr, ptr + map's value_size) is accessible. +@@ -123,346 +137,713 @@ + * + * After the call R0 is set to return type of the function and registers R1-R5 + * are set to NOT_INIT to indicate that they are no longer readable. ++ * ++ * The following reference types represent a potential reference to a kernel ++ * resource which, after first being allocated, must be checked and freed by ++ * the BPF program: ++ * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET ++ * ++ * When the verifier sees a helper call return a reference type, it allocates a ++ * pointer id for the reference and stores it in the current function state. ++ * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into ++ * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type ++ * passes through a NULL-check conditional. For the branch wherein the state is ++ * changed to CONST_IMM, the verifier releases the reference. ++ * ++ * For each helper function that allocates a reference, such as ++ * bpf_sk_lookup_tcp(), there is a corresponding release function, such as ++ * bpf_sk_release(). When a reference type passes into the release function, ++ * the verifier also releases the reference. If any unchecked or unreleased ++ * reference remains at the end of the program, the verifier rejects it. + */ + +-/* types of values stored in eBPF registers */ +-enum bpf_reg_type { +- NOT_INIT = 0, /* nothing was written into register */ +- UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ +- PTR_TO_CTX, /* reg points to bpf_context */ +- CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ +- PTR_TO_MAP_VALUE, /* reg points to map element value */ +- PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ +- FRAME_PTR, /* reg == frame_pointer */ +- PTR_TO_STACK, /* reg == frame_pointer + imm */ +- CONST_IMM, /* constant integer value */ +-}; +- +-struct reg_state { +- enum bpf_reg_type type; +- union { +- /* valid when type == CONST_IMM | PTR_TO_STACK */ +- int imm; +- +- /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | +- * PTR_TO_MAP_VALUE_OR_NULL +- */ +- struct bpf_map *map_ptr; +- }; +-}; +- +-enum bpf_stack_slot_type { +- STACK_INVALID, /* nothing was stored in this stack slot */ +- STACK_SPILL, /* register spilled into stack */ +- STACK_MISC /* BPF program wrote some data into this slot */ +-}; +- +-#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ +- +-/* state of the program: +- * type of all registers and stack info +- */ +-struct verifier_state { +- struct reg_state regs[MAX_BPF_REG]; +- u8 stack_slot_type[MAX_BPF_STACK]; +- struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; +-}; +- +-/* linked list of verifier states used to prune search */ +-struct verifier_state_list { +- struct verifier_state state; +- struct verifier_state_list *next; +-}; +- + /* verifier_state + insn_idx are pushed to stack when branch is encountered */ +-struct verifier_stack_elem { ++struct bpf_verifier_stack_elem { + /* verifer state is 'st' + * before processing instruction 'insn_idx' + * and after processing instruction 'prev_insn_idx' + */ +- struct verifier_state st; ++ struct bpf_verifier_state st; + int insn_idx; + int prev_insn_idx; +- struct verifier_stack_elem *next; ++ struct bpf_verifier_stack_elem *next; + }; + +-#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ ++#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192 ++#define BPF_COMPLEXITY_LIMIT_STATES 64 + +-/* single container for all structs +- * one verifier_env per bpf_check() call +- */ +-struct verifier_env { +- struct bpf_prog *prog; /* eBPF program being verified */ +- struct verifier_stack_elem *head; /* stack of verifier states to be processed */ +- int stack_size; /* number of states to be processed */ +- struct verifier_state cur_state; /* current verifier state */ +- struct verifier_state_list **explored_states; /* search pruning optimization */ +- struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ +- u32 used_map_cnt; /* number of used maps */ +- bool allow_ptr_leaks; +-}; ++#define BPF_MAP_PTR_UNPRIV 1UL ++#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ ++ POISON_POINTER_DELTA)) ++#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) + +-/* verbose verifier prints what it's seeing +- * bpf_check() is called under lock, so no race to access these global vars +- */ +-static u32 log_level, log_size, log_len; +-static char *log_buf; ++static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) ++{ ++ return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON; ++} ++ ++static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux) ++{ ++ return aux->map_state & BPF_MAP_PTR_UNPRIV; ++} ++ ++static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, ++ const struct bpf_map *map, bool unpriv) ++{ ++ BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV); ++ unpriv |= bpf_map_ptr_unpriv(aux); ++ aux->map_state = (unsigned long)map | ++ (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL); ++} ++ ++struct bpf_call_arg_meta { ++ struct bpf_map *map_ptr; ++ bool raw_mode; ++ bool pkt_access; ++ int regno; ++ int access_size; ++ u64 msize_max_value; ++ int ref_obj_id; ++ int func_id; ++}; + + static DEFINE_MUTEX(bpf_verifier_lock); + ++static const struct bpf_line_info * ++find_linfo(const struct bpf_verifier_env *env, u32 insn_off) ++{ ++ const struct bpf_line_info *linfo; ++ const struct bpf_prog *prog; ++ u32 i, nr_linfo; ++ ++ prog = env->prog; ++ nr_linfo = prog->aux->nr_linfo; ++ ++ if (!nr_linfo || insn_off >= prog->len) ++ return NULL; ++ ++ linfo = prog->aux->linfo; ++ for (i = 1; i < nr_linfo; i++) ++ if (insn_off < linfo[i].insn_off) ++ break; ++ ++ return &linfo[i - 1]; ++} ++ ++void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, ++ va_list args) ++{ ++ unsigned int n; ++ ++ n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); ++ ++ WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, ++ "verifier log line truncated - local buffer too short\n"); ++ ++ n = min(log->len_total - log->len_used - 1, n); ++ log->kbuf[n] = '\0'; ++ ++ if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) ++ log->len_used += n; ++ else ++ log->ubuf = NULL; ++} ++ + /* log_level controls verbosity level of eBPF verifier. +- * verbose() is used to dump the verification trace to the log, so the user +- * can figure out what's wrong with the program ++ * bpf_verifier_log_write() is used to dump the verification trace to the log, ++ * so the user can figure out what's wrong with the program + */ +-static __printf(1, 2) void verbose(const char *fmt, ...) ++__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, ++ const char *fmt, ...) + { + va_list args; + +- if (log_level == 0 || log_len >= log_size - 1) ++ if (!bpf_verifier_log_needed(&env->log)) + return; + + va_start(args, fmt); +- log_len += vscnprintf(log_buf + log_len, log_size - log_len, fmt, args); ++ bpf_verifier_vlog(&env->log, fmt, args); + va_end(args); + } ++EXPORT_SYMBOL_GPL(bpf_verifier_log_write); ++ ++__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) ++{ ++ struct bpf_verifier_env *env = private_data; ++ va_list args; ++ ++ if (!bpf_verifier_log_needed(&env->log)) ++ return; ++ ++ va_start(args, fmt); ++ bpf_verifier_vlog(&env->log, fmt, args); ++ va_end(args); ++} ++ ++static const char *ltrim(const char *s) ++{ ++ while (isspace(*s)) ++ s++; ++ ++ return s; ++} ++ ++__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env, ++ u32 insn_off, ++ const char *prefix_fmt, ...) ++{ ++ const struct bpf_line_info *linfo; ++ ++ if (!bpf_verifier_log_needed(&env->log)) ++ return; ++ ++ linfo = find_linfo(env, insn_off); ++ if (!linfo || linfo == env->prev_linfo) ++ return; ++ ++ if (prefix_fmt) { ++ va_list args; ++ ++ va_start(args, prefix_fmt); ++ bpf_verifier_vlog(&env->log, prefix_fmt, args); ++ va_end(args); ++ } ++ ++ verbose(env, "%s\n", ++ ltrim(btf_name_by_offset(env->prog->aux->btf, ++ linfo->line_off))); ++ ++ env->prev_linfo = linfo; ++} ++ ++static bool type_is_pkt_pointer(enum bpf_reg_type type) ++{ ++ return type == PTR_TO_PACKET || ++ type == PTR_TO_PACKET_META; ++} ++ ++static bool type_is_sk_pointer(enum bpf_reg_type type) ++{ ++ return type == PTR_TO_SOCKET || ++ type == PTR_TO_SOCK_COMMON || ++ type == PTR_TO_TCP_SOCK || ++ type == PTR_TO_XDP_SOCK; ++} ++ ++static bool reg_type_may_be_null(enum bpf_reg_type type) ++{ ++ return type == PTR_TO_MAP_VALUE_OR_NULL || ++ type == PTR_TO_SOCKET_OR_NULL || ++ type == PTR_TO_SOCK_COMMON_OR_NULL || ++ type == PTR_TO_TCP_SOCK_OR_NULL; ++} ++ ++static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) ++{ ++ return reg->type == PTR_TO_MAP_VALUE && ++ map_value_has_spin_lock(reg->map_ptr); ++} ++ ++static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) ++{ ++ return type == PTR_TO_SOCKET || ++ type == PTR_TO_SOCKET_OR_NULL || ++ type == PTR_TO_TCP_SOCK || ++ type == PTR_TO_TCP_SOCK_OR_NULL; ++} ++ ++static bool arg_type_may_be_refcounted(enum bpf_arg_type type) ++{ ++ return type == ARG_PTR_TO_SOCK_COMMON; ++} ++ ++/* Determine whether the function releases some resources allocated by another ++ * function call. The first reference type argument will be assumed to be ++ * released by release_reference(). ++ */ ++static bool is_release_function(enum bpf_func_id func_id) ++{ ++ return func_id == BPF_FUNC_sk_release; ++} ++ ++static bool is_acquire_function(enum bpf_func_id func_id) ++{ ++ return func_id == BPF_FUNC_sk_lookup_tcp || ++ func_id == BPF_FUNC_sk_lookup_udp || ++ func_id == BPF_FUNC_skc_lookup_tcp; ++} ++ ++static bool is_ptr_cast_function(enum bpf_func_id func_id) ++{ ++ return func_id == BPF_FUNC_tcp_sock || ++ func_id == BPF_FUNC_sk_fullsock; ++} + + /* string representation of 'enum bpf_reg_type' */ + static const char * const reg_type_str[] = { + [NOT_INIT] = "?", +- [UNKNOWN_VALUE] = "inv", ++ [SCALAR_VALUE] = "inv", + [PTR_TO_CTX] = "ctx", + [CONST_PTR_TO_MAP] = "map_ptr", + [PTR_TO_MAP_VALUE] = "map_value", + [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", +- [FRAME_PTR] = "fp", + [PTR_TO_STACK] = "fp", +- [CONST_IMM] = "imm", ++ [PTR_TO_PACKET] = "pkt", ++ [PTR_TO_PACKET_META] = "pkt_meta", ++ [PTR_TO_PACKET_END] = "pkt_end", ++ [PTR_TO_FLOW_KEYS] = "flow_keys", ++ [PTR_TO_SOCKET] = "sock", ++ [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", ++ [PTR_TO_SOCK_COMMON] = "sock_common", ++ [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", ++ [PTR_TO_TCP_SOCK] = "tcp_sock", ++ [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", ++ [PTR_TO_TP_BUFFER] = "tp_buffer", ++ [PTR_TO_XDP_SOCK] = "xdp_sock", + }; + +-static void print_verifier_state(struct verifier_env *env) ++static char slot_type_char[] = { ++ [STACK_INVALID] = '?', ++ [STACK_SPILL] = 'r', ++ [STACK_MISC] = 'm', ++ [STACK_ZERO] = '0', ++}; ++ ++static void print_liveness(struct bpf_verifier_env *env, ++ enum bpf_reg_liveness live) ++{ ++ if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE)) ++ verbose(env, "_"); ++ if (live & REG_LIVE_READ) ++ verbose(env, "r"); ++ if (live & REG_LIVE_WRITTEN) ++ verbose(env, "w"); ++ if (live & REG_LIVE_DONE) ++ verbose(env, "D"); ++} ++ ++static struct bpf_func_state *func(struct bpf_verifier_env *env, ++ const struct bpf_reg_state *reg) + { ++ struct bpf_verifier_state *cur = env->cur_state; ++ ++ return cur->frame[reg->frameno]; ++} ++ ++static void print_verifier_state(struct bpf_verifier_env *env, ++ const struct bpf_func_state *state) ++{ ++ const struct bpf_reg_state *reg; + enum bpf_reg_type t; + int i; + ++ if (state->frameno) ++ verbose(env, " frame%d:", state->frameno); + for (i = 0; i < MAX_BPF_REG; i++) { +- t = env->cur_state.regs[i].type; ++ reg = &state->regs[i]; ++ t = reg->type; + if (t == NOT_INIT) + continue; +- verbose(" R%d=%s", i, reg_type_str[t]); +- if (t == CONST_IMM || t == PTR_TO_STACK) +- verbose("%d", env->cur_state.regs[i].imm); +- else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || +- t == PTR_TO_MAP_VALUE_OR_NULL) +- verbose("(ks=%d,vs=%d)", +- env->cur_state.regs[i].map_ptr->key_size, +- env->cur_state.regs[i].map_ptr->value_size); +- } +- for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { +- if (env->cur_state.stack_slot_type[i] == STACK_SPILL) +- verbose(" fp%d=%s", -MAX_BPF_STACK + i, +- reg_type_str[env->cur_state.spilled_regs[i / BPF_REG_SIZE].type]); +- } +- verbose("\n"); +-} +- +-static const char *const bpf_class_string[] = { +- [BPF_LD] = "ld", +- [BPF_LDX] = "ldx", +- [BPF_ST] = "st", +- [BPF_STX] = "stx", +- [BPF_ALU] = "alu", +- [BPF_JMP] = "jmp", +- [BPF_RET] = "BUG", +- [BPF_ALU64] = "alu64", +-}; ++ verbose(env, " R%d", i); ++ print_liveness(env, reg->live); ++ verbose(env, "=%s", reg_type_str[t]); ++ if (t == SCALAR_VALUE && reg->precise) ++ verbose(env, "P"); ++ if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && ++ tnum_is_const(reg->var_off)) { ++ /* reg->off should be 0 for SCALAR_VALUE */ ++ verbose(env, "%lld", reg->var_off.value + reg->off); ++ } else { ++ verbose(env, "(id=%d", reg->id); ++ if (reg_type_may_be_refcounted_or_null(t)) ++ verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); ++ if (t != SCALAR_VALUE) ++ verbose(env, ",off=%d", reg->off); ++ if (type_is_pkt_pointer(t)) ++ verbose(env, ",r=%d", reg->range); ++ else if (t == CONST_PTR_TO_MAP || ++ t == PTR_TO_MAP_VALUE || ++ t == PTR_TO_MAP_VALUE_OR_NULL) ++ verbose(env, ",ks=%d,vs=%d", ++ reg->map_ptr->key_size, ++ reg->map_ptr->value_size); ++ if (tnum_is_const(reg->var_off)) { ++ /* Typically an immediate SCALAR_VALUE, but ++ * could be a pointer whose offset is too big ++ * for reg->off ++ */ ++ verbose(env, ",imm=%llx", reg->var_off.value); ++ } else { ++ if (reg->smin_value != reg->umin_value && ++ reg->smin_value != S64_MIN) ++ verbose(env, ",smin_value=%lld", ++ (long long)reg->smin_value); ++ if (reg->smax_value != reg->umax_value && ++ reg->smax_value != S64_MAX) ++ verbose(env, ",smax_value=%lld", ++ (long long)reg->smax_value); ++ if (reg->umin_value != 0) ++ verbose(env, ",umin_value=%llu", ++ (unsigned long long)reg->umin_value); ++ if (reg->umax_value != U64_MAX) ++ verbose(env, ",umax_value=%llu", ++ (unsigned long long)reg->umax_value); ++ if (!tnum_is_unknown(reg->var_off)) { ++ char tn_buf[48]; + +-static const char *const bpf_alu_string[16] = { +- [BPF_ADD >> 4] = "+=", +- [BPF_SUB >> 4] = "-=", +- [BPF_MUL >> 4] = "*=", +- [BPF_DIV >> 4] = "/=", +- [BPF_OR >> 4] = "|=", +- [BPF_AND >> 4] = "&=", +- [BPF_LSH >> 4] = "<<=", +- [BPF_RSH >> 4] = ">>=", +- [BPF_NEG >> 4] = "neg", +- [BPF_MOD >> 4] = "%=", +- [BPF_XOR >> 4] = "^=", +- [BPF_MOV >> 4] = "=", +- [BPF_ARSH >> 4] = "s>>=", +- [BPF_END >> 4] = "endian", +-}; ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, ",var_off=%s", tn_buf); ++ } ++ } ++ verbose(env, ")"); ++ } ++ } ++ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { ++ char types_buf[BPF_REG_SIZE + 1]; ++ bool valid = false; ++ int j; ++ ++ for (j = 0; j < BPF_REG_SIZE; j++) { ++ if (state->stack[i].slot_type[j] != STACK_INVALID) ++ valid = true; ++ types_buf[j] = slot_type_char[ ++ state->stack[i].slot_type[j]]; ++ } ++ types_buf[BPF_REG_SIZE] = 0; ++ if (!valid) ++ continue; ++ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); ++ print_liveness(env, state->stack[i].spilled_ptr.live); ++ if (state->stack[i].slot_type[0] == STACK_SPILL) { ++ reg = &state->stack[i].spilled_ptr; ++ t = reg->type; ++ verbose(env, "=%s", reg_type_str[t]); ++ if (t == SCALAR_VALUE && reg->precise) ++ verbose(env, "P"); ++ if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) ++ verbose(env, "%lld", reg->var_off.value + reg->off); ++ } else { ++ verbose(env, "=%s", types_buf); ++ } ++ } ++ if (state->acquired_refs && state->refs[0].id) { ++ verbose(env, " refs=%d", state->refs[0].id); ++ for (i = 1; i < state->acquired_refs; i++) ++ if (state->refs[i].id) ++ verbose(env, ",%d", state->refs[i].id); ++ } ++ verbose(env, "\n"); ++} + +-static const char *const bpf_ldst_string[] = { +- [BPF_W >> 3] = "u32", +- [BPF_H >> 3] = "u16", +- [BPF_B >> 3] = "u8", +- [BPF_DW >> 3] = "u64", +-}; ++#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \ ++static int copy_##NAME##_state(struct bpf_func_state *dst, \ ++ const struct bpf_func_state *src) \ ++{ \ ++ if (!src->FIELD) \ ++ return 0; \ ++ if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \ ++ /* internal bug, make state invalid to reject the program */ \ ++ memset(dst, 0, sizeof(*dst)); \ ++ return -EFAULT; \ ++ } \ ++ memcpy(dst->FIELD, src->FIELD, \ ++ sizeof(*src->FIELD) * (src->COUNT / SIZE)); \ ++ return 0; \ ++} ++/* copy_reference_state() */ ++COPY_STATE_FN(reference, acquired_refs, refs, 1) ++/* copy_stack_state() */ ++COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) ++#undef COPY_STATE_FN ++ ++#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \ ++static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \ ++ bool copy_old) \ ++{ \ ++ u32 old_size = state->COUNT; \ ++ struct bpf_##NAME##_state *new_##FIELD; \ ++ int slot = size / SIZE; \ ++ \ ++ if (size <= old_size || !size) { \ ++ if (copy_old) \ ++ return 0; \ ++ state->COUNT = slot * SIZE; \ ++ if (!size && old_size) { \ ++ kfree(state->FIELD); \ ++ state->FIELD = NULL; \ ++ } \ ++ return 0; \ ++ } \ ++ new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \ ++ GFP_KERNEL); \ ++ if (!new_##FIELD) \ ++ return -ENOMEM; \ ++ if (copy_old) { \ ++ if (state->FIELD) \ ++ memcpy(new_##FIELD, state->FIELD, \ ++ sizeof(*new_##FIELD) * (old_size / SIZE)); \ ++ memset(new_##FIELD + old_size / SIZE, 0, \ ++ sizeof(*new_##FIELD) * (size - old_size) / SIZE); \ ++ } \ ++ state->COUNT = slot * SIZE; \ ++ kfree(state->FIELD); \ ++ state->FIELD = new_##FIELD; \ ++ return 0; \ ++} ++/* realloc_reference_state() */ ++REALLOC_STATE_FN(reference, acquired_refs, refs, 1) ++/* realloc_stack_state() */ ++REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) ++#undef REALLOC_STATE_FN ++ ++/* do_check() starts with zero-sized stack in struct bpf_verifier_state to ++ * make it consume minimal amount of memory. check_stack_write() access from ++ * the program calls into realloc_func_state() to grow the stack size. ++ * Note there is a non-zero 'parent' pointer inside bpf_verifier_state ++ * which realloc_stack_state() copies over. It points to previous ++ * bpf_verifier_state which is never reallocated. ++ */ ++static int realloc_func_state(struct bpf_func_state *state, int stack_size, ++ int refs_size, bool copy_old) ++{ ++ int err = realloc_reference_state(state, refs_size, copy_old); ++ if (err) ++ return err; ++ return realloc_stack_state(state, stack_size, copy_old); ++} + +-static const char *const bpf_jmp_string[16] = { +- [BPF_JA >> 4] = "jmp", +- [BPF_JEQ >> 4] = "==", +- [BPF_JGT >> 4] = ">", +- [BPF_JGE >> 4] = ">=", +- [BPF_JSET >> 4] = "&", +- [BPF_JNE >> 4] = "!=", +- [BPF_JSGT >> 4] = "s>", +- [BPF_JSGE >> 4] = "s>=", +- [BPF_CALL >> 4] = "call", +- [BPF_EXIT >> 4] = "exit", +-}; ++/* Acquire a pointer id from the env and update the state->refs to include ++ * this new pointer reference. ++ * On success, returns a valid pointer id to associate with the register ++ * On failure, returns a negative errno. ++ */ ++static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) ++{ ++ struct bpf_func_state *state = cur_func(env); ++ int new_ofs = state->acquired_refs; ++ int id, err; + +-static void print_bpf_insn(const struct verifier_env *env, +- const struct bpf_insn *insn) ++ err = realloc_reference_state(state, state->acquired_refs + 1, true); ++ if (err) ++ return err; ++ id = ++env->id_gen; ++ state->refs[new_ofs].id = id; ++ state->refs[new_ofs].insn_idx = insn_idx; ++ ++ return id; ++} ++ ++/* release function corresponding to acquire_reference_state(). Idempotent. */ ++static int release_reference_state(struct bpf_func_state *state, int ptr_id) + { +- u8 class = BPF_CLASS(insn->code); ++ int i, last_idx; + +- if (class == BPF_ALU || class == BPF_ALU64) { +- if (BPF_SRC(insn->code) == BPF_X) +- verbose("(%02x) %sr%d %s %sr%d\n", +- insn->code, class == BPF_ALU ? "(u32) " : "", +- insn->dst_reg, +- bpf_alu_string[BPF_OP(insn->code) >> 4], +- class == BPF_ALU ? "(u32) " : "", +- insn->src_reg); +- else +- verbose("(%02x) %sr%d %s %s%d\n", +- insn->code, class == BPF_ALU ? "(u32) " : "", +- insn->dst_reg, +- bpf_alu_string[BPF_OP(insn->code) >> 4], +- class == BPF_ALU ? "(u32) " : "", +- insn->imm); +- } else if (class == BPF_STX) { +- if (BPF_MODE(insn->code) == BPF_MEM) +- verbose("(%02x) *(%s *)(r%d %+d) = r%d\n", +- insn->code, +- bpf_ldst_string[BPF_SIZE(insn->code) >> 3], +- insn->dst_reg, +- insn->off, insn->src_reg); +- else if (BPF_MODE(insn->code) == BPF_XADD) +- verbose("(%02x) lock *(%s *)(r%d %+d) += r%d\n", +- insn->code, +- bpf_ldst_string[BPF_SIZE(insn->code) >> 3], +- insn->dst_reg, insn->off, +- insn->src_reg); +- else +- verbose("BUG_%02x\n", insn->code); +- } else if (class == BPF_ST) { +- if (BPF_MODE(insn->code) != BPF_MEM) { +- verbose("BUG_st_%02x\n", insn->code); +- return; +- } +- verbose("(%02x) *(%s *)(r%d %+d) = %d\n", +- insn->code, +- bpf_ldst_string[BPF_SIZE(insn->code) >> 3], +- insn->dst_reg, +- insn->off, insn->imm); +- } else if (class == BPF_LDX) { +- if (BPF_MODE(insn->code) != BPF_MEM) { +- verbose("BUG_ldx_%02x\n", insn->code); +- return; ++ last_idx = state->acquired_refs - 1; ++ for (i = 0; i < state->acquired_refs; i++) { ++ if (state->refs[i].id == ptr_id) { ++ if (last_idx && i != last_idx) ++ memcpy(&state->refs[i], &state->refs[last_idx], ++ sizeof(*state->refs)); ++ memset(&state->refs[last_idx], 0, sizeof(*state->refs)); ++ state->acquired_refs--; ++ return 0; + } +- verbose("(%02x) r%d = *(%s *)(r%d %+d)\n", +- insn->code, insn->dst_reg, +- bpf_ldst_string[BPF_SIZE(insn->code) >> 3], +- insn->src_reg, insn->off); +- } else if (class == BPF_LD) { +- if (BPF_MODE(insn->code) == BPF_ABS) { +- verbose("(%02x) r0 = *(%s *)skb[%d]\n", +- insn->code, +- bpf_ldst_string[BPF_SIZE(insn->code) >> 3], +- insn->imm); +- } else if (BPF_MODE(insn->code) == BPF_IND) { +- verbose("(%02x) r0 = *(%s *)skb[r%d + %d]\n", +- insn->code, +- bpf_ldst_string[BPF_SIZE(insn->code) >> 3], +- insn->src_reg, insn->imm); +- } else if (BPF_MODE(insn->code) == BPF_IMM && +- BPF_SIZE(insn->code) == BPF_DW) { +- /* At this point, we already made sure that the second +- * part of the ldimm64 insn is accessible. +- */ +- u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; +- bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD; ++ } ++ return -EINVAL; ++} + +- if (map_ptr && !env->allow_ptr_leaks) +- imm = 0; ++static int transfer_reference_state(struct bpf_func_state *dst, ++ struct bpf_func_state *src) ++{ ++ int err = realloc_reference_state(dst, src->acquired_refs, false); ++ if (err) ++ return err; ++ err = copy_reference_state(dst, src); ++ if (err) ++ return err; ++ return 0; ++} + +- verbose("(%02x) r%d = 0x%llx\n", insn->code, +- insn->dst_reg, (unsigned long long)imm); +- } else { +- verbose("BUG_ld_%02x\n", insn->code); +- return; +- } +- } else if (class == BPF_JMP) { +- u8 opcode = BPF_OP(insn->code); ++static void free_func_state(struct bpf_func_state *state) ++{ ++ if (!state) ++ return; ++ kfree(state->refs); ++ kfree(state->stack); ++ kfree(state); ++} + +- if (opcode == BPF_CALL) { +- verbose("(%02x) call %d\n", insn->code, insn->imm); +- } else if (insn->code == (BPF_JMP | BPF_JA)) { +- verbose("(%02x) goto pc%+d\n", +- insn->code, insn->off); +- } else if (insn->code == (BPF_JMP | BPF_EXIT)) { +- verbose("(%02x) exit\n", insn->code); +- } else if (BPF_SRC(insn->code) == BPF_X) { +- verbose("(%02x) if r%d %s r%d goto pc%+d\n", +- insn->code, insn->dst_reg, +- bpf_jmp_string[BPF_OP(insn->code) >> 4], +- insn->src_reg, insn->off); +- } else { +- verbose("(%02x) if r%d %s 0x%x goto pc%+d\n", +- insn->code, insn->dst_reg, +- bpf_jmp_string[BPF_OP(insn->code) >> 4], +- insn->imm, insn->off); ++static void clear_jmp_history(struct bpf_verifier_state *state) ++{ ++ kfree(state->jmp_history); ++ state->jmp_history = NULL; ++ state->jmp_history_cnt = 0; ++} ++ ++static void free_verifier_state(struct bpf_verifier_state *state, ++ bool free_self) ++{ ++ int i; ++ ++ for (i = 0; i <= state->curframe; i++) { ++ free_func_state(state->frame[i]); ++ state->frame[i] = NULL; ++ } ++ clear_jmp_history(state); ++ if (free_self) ++ kfree(state); ++} ++ ++/* copy verifier state from src to dst growing dst stack space ++ * when necessary to accommodate larger src stack ++ */ ++static int copy_func_state(struct bpf_func_state *dst, ++ const struct bpf_func_state *src) ++{ ++ int err; ++ ++ err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, ++ false); ++ if (err) ++ return err; ++ memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); ++ err = copy_reference_state(dst, src); ++ if (err) ++ return err; ++ return copy_stack_state(dst, src); ++} ++ ++static int copy_verifier_state(struct bpf_verifier_state *dst_state, ++ const struct bpf_verifier_state *src) ++{ ++ struct bpf_func_state *dst; ++ u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt; ++ int i, err; ++ ++ if (dst_state->jmp_history_cnt < src->jmp_history_cnt) { ++ kfree(dst_state->jmp_history); ++ dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER); ++ if (!dst_state->jmp_history) ++ return -ENOMEM; ++ } ++ memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz); ++ dst_state->jmp_history_cnt = src->jmp_history_cnt; ++ ++ /* if dst has more stack frames then src frame, free them */ ++ for (i = src->curframe + 1; i <= dst_state->curframe; i++) { ++ free_func_state(dst_state->frame[i]); ++ dst_state->frame[i] = NULL; ++ } ++ dst_state->speculative = src->speculative; ++ dst_state->curframe = src->curframe; ++ dst_state->active_spin_lock = src->active_spin_lock; ++ dst_state->branches = src->branches; ++ dst_state->parent = src->parent; ++ dst_state->first_insn_idx = src->first_insn_idx; ++ dst_state->last_insn_idx = src->last_insn_idx; ++ for (i = 0; i <= src->curframe; i++) { ++ dst = dst_state->frame[i]; ++ if (!dst) { ++ dst = kzalloc(sizeof(*dst), GFP_KERNEL); ++ if (!dst) ++ return -ENOMEM; ++ dst_state->frame[i] = dst; + } +- } else { +- verbose("(%02x) %s\n", insn->code, bpf_class_string[class]); ++ err = copy_func_state(dst, src->frame[i]); ++ if (err) ++ return err; + } ++ return 0; + } + +-static int pop_stack(struct verifier_env *env, int *prev_insn_idx) ++static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) + { +- struct verifier_stack_elem *elem; +- int insn_idx; ++ while (st) { ++ u32 br = --st->branches; ++ ++ /* WARN_ON(br > 1) technically makes sense here, ++ * but see comment in push_stack(), hence: ++ */ ++ WARN_ONCE((int)br < 0, ++ "BUG update_branch_counts:branches_to_explore=%d\n", ++ br); ++ if (br) ++ break; ++ st = st->parent; ++ } ++} ++ ++static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, ++ int *insn_idx) ++{ ++ struct bpf_verifier_state *cur = env->cur_state; ++ struct bpf_verifier_stack_elem *elem, *head = env->head; ++ int err; + + if (env->head == NULL) +- return -1; ++ return -ENOENT; + +- memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state)); +- insn_idx = env->head->insn_idx; ++ if (cur) { ++ err = copy_verifier_state(cur, &head->st); ++ if (err) ++ return err; ++ } ++ if (insn_idx) ++ *insn_idx = head->insn_idx; + if (prev_insn_idx) +- *prev_insn_idx = env->head->prev_insn_idx; +- elem = env->head->next; +- kfree(env->head); ++ *prev_insn_idx = head->prev_insn_idx; ++ elem = head->next; ++ free_verifier_state(&head->st, false); ++ kfree(head); + env->head = elem; + env->stack_size--; +- return insn_idx; ++ return 0; + } + +-static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx, +- int prev_insn_idx) ++static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, ++ int insn_idx, int prev_insn_idx, ++ bool speculative) + { +- struct verifier_stack_elem *elem; ++ struct bpf_verifier_state *cur = env->cur_state; ++ struct bpf_verifier_stack_elem *elem; ++ int err; + +- elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL); ++ elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); + if (!elem) + goto err; + +- memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state)); + elem->insn_idx = insn_idx; + elem->prev_insn_idx = prev_insn_idx; + elem->next = env->head; + env->head = elem; + env->stack_size++; +- if (env->stack_size > 1024) { +- verbose("BPF program is too complex\n"); ++ err = copy_verifier_state(&elem->st, cur); ++ if (err) ++ goto err; ++ elem->st.speculative |= speculative; ++ if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { ++ verbose(env, "The sequence of %d jumps is too complex.\n", ++ env->stack_size); + goto err; + } ++ if (elem->st.parent) { ++ ++elem->st.parent->branches; ++ /* WARN_ON(branches > 2) technically makes sense here, ++ * but ++ * 1. speculative states will bump 'branches' for non-branch ++ * instructions ++ * 2. is_state_visited() heuristics may decide not to create ++ * a new state for a sequence of branches and all such current ++ * and cloned states will be pointing to a single parent state ++ * which might have large 'branches' count. ++ */ ++ } + return &elem->st; + err: ++ free_verifier_state(env->cur_state, true); ++ env->cur_state = NULL; + /* pop all elements and return */ +- while (pop_stack(env, NULL) >= 0); ++ while (!pop_stack(env, NULL, NULL)); + return NULL; + } + +@@ -471,29 +852,225 @@ static const int caller_saved[CALLER_SAV + BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 + }; + +-static void init_reg_state(struct reg_state *regs) ++static void __mark_reg_not_init(const struct bpf_verifier_env *env, ++ struct bpf_reg_state *reg); ++ ++/* Mark the unknown part of a register (variable offset or scalar value) as ++ * known to have the value @imm. ++ */ ++static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) ++{ ++ /* Clear id, off, and union(map_ptr, range) */ ++ memset(((u8 *)reg) + sizeof(reg->type), 0, ++ offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); ++ reg->var_off = tnum_const(imm); ++ reg->smin_value = (s64)imm; ++ reg->smax_value = (s64)imm; ++ reg->umin_value = imm; ++ reg->umax_value = imm; ++} ++ ++/* Mark the 'variable offset' part of a register as zero. This should be ++ * used only on registers holding a pointer type. ++ */ ++static void __mark_reg_known_zero(struct bpf_reg_state *reg) ++{ ++ __mark_reg_known(reg, 0); ++} ++ ++static void __mark_reg_const_zero(struct bpf_reg_state *reg) ++{ ++ __mark_reg_known(reg, 0); ++ reg->type = SCALAR_VALUE; ++} ++ ++static void mark_reg_known_zero(struct bpf_verifier_env *env, ++ struct bpf_reg_state *regs, u32 regno) ++{ ++ if (WARN_ON(regno >= MAX_BPF_REG)) { ++ verbose(env, "mark_reg_known_zero(regs, %u)\n", regno); ++ /* Something bad happened, let's kill all regs */ ++ for (regno = 0; regno < MAX_BPF_REG; regno++) ++ __mark_reg_not_init(env, regs + regno); ++ return; ++ } ++ __mark_reg_known_zero(regs + regno); ++} ++ ++static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) ++{ ++ return type_is_pkt_pointer(reg->type); ++} ++ ++static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) ++{ ++ return reg_is_pkt_pointer(reg) || ++ reg->type == PTR_TO_PACKET_END; ++} ++ ++/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ ++static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, ++ enum bpf_reg_type which) ++{ ++ /* The register can already have a range from prior markings. ++ * This is fine as long as it hasn't been advanced from its ++ * origin. ++ */ ++ return reg->type == which && ++ reg->id == 0 && ++ reg->off == 0 && ++ tnum_equals_const(reg->var_off, 0); ++} ++ ++/* Attempts to improve min/max values based on var_off information */ ++static void __update_reg_bounds(struct bpf_reg_state *reg) ++{ ++ /* min signed is max(sign bit) | min(other bits) */ ++ reg->smin_value = max_t(s64, reg->smin_value, ++ reg->var_off.value | (reg->var_off.mask & S64_MIN)); ++ /* max signed is min(sign bit) | max(other bits) */ ++ reg->smax_value = min_t(s64, reg->smax_value, ++ reg->var_off.value | (reg->var_off.mask & S64_MAX)); ++ reg->umin_value = max(reg->umin_value, reg->var_off.value); ++ reg->umax_value = min(reg->umax_value, ++ reg->var_off.value | reg->var_off.mask); ++} ++ ++/* Uses signed min/max values to inform unsigned, and vice-versa */ ++static void __reg_deduce_bounds(struct bpf_reg_state *reg) + { ++ /* Learn sign from signed bounds. ++ * If we cannot cross the sign boundary, then signed and unsigned bounds ++ * are the same, so combine. This works even in the negative case, e.g. ++ * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. ++ */ ++ if (reg->smin_value >= 0 || reg->smax_value < 0) { ++ reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, ++ reg->umin_value); ++ reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, ++ reg->umax_value); ++ return; ++ } ++ /* Learn sign from unsigned bounds. Signed bounds cross the sign ++ * boundary, so we must be careful. ++ */ ++ if ((s64)reg->umax_value >= 0) { ++ /* Positive. We can't learn anything from the smin, but smax ++ * is positive, hence safe. ++ */ ++ reg->smin_value = reg->umin_value; ++ reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, ++ reg->umax_value); ++ } else if ((s64)reg->umin_value < 0) { ++ /* Negative. We can't learn anything from the smax, but smin ++ * is negative, hence safe. ++ */ ++ reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, ++ reg->umin_value); ++ reg->smax_value = reg->umax_value; ++ } ++} ++ ++/* Attempts to improve var_off based on unsigned min/max information */ ++static void __reg_bound_offset(struct bpf_reg_state *reg) ++{ ++ reg->var_off = tnum_intersect(reg->var_off, ++ tnum_range(reg->umin_value, ++ reg->umax_value)); ++} ++ ++/* Reset the min/max bounds of a register */ ++static void __mark_reg_unbounded(struct bpf_reg_state *reg) ++{ ++ reg->smin_value = S64_MIN; ++ reg->smax_value = S64_MAX; ++ reg->umin_value = 0; ++ reg->umax_value = U64_MAX; ++} ++ ++/* Mark a register as having a completely unknown (scalar) value. */ ++static void __mark_reg_unknown(const struct bpf_verifier_env *env, ++ struct bpf_reg_state *reg) ++{ ++ /* ++ * Clear type, id, off, and union(map_ptr, range) and ++ * padding between 'type' and union ++ */ ++ memset(reg, 0, offsetof(struct bpf_reg_state, var_off)); ++ reg->type = SCALAR_VALUE; ++ reg->var_off = tnum_unknown; ++ reg->frameno = 0; ++ reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ? ++ true : false; ++ __mark_reg_unbounded(reg); ++} ++ ++static void mark_reg_unknown(struct bpf_verifier_env *env, ++ struct bpf_reg_state *regs, u32 regno) ++{ ++ if (WARN_ON(regno >= MAX_BPF_REG)) { ++ verbose(env, "mark_reg_unknown(regs, %u)\n", regno); ++ /* Something bad happened, let's kill all regs except FP */ ++ for (regno = 0; regno < BPF_REG_FP; regno++) ++ __mark_reg_not_init(env, regs + regno); ++ return; ++ } ++ __mark_reg_unknown(env, regs + regno); ++} ++ ++static void __mark_reg_not_init(const struct bpf_verifier_env *env, ++ struct bpf_reg_state *reg) ++{ ++ __mark_reg_unknown(env, reg); ++ reg->type = NOT_INIT; ++} ++ ++static void mark_reg_not_init(struct bpf_verifier_env *env, ++ struct bpf_reg_state *regs, u32 regno) ++{ ++ if (WARN_ON(regno >= MAX_BPF_REG)) { ++ verbose(env, "mark_reg_not_init(regs, %u)\n", regno); ++ /* Something bad happened, let's kill all regs except FP */ ++ for (regno = 0; regno < BPF_REG_FP; regno++) ++ __mark_reg_not_init(env, regs + regno); ++ return; ++ } ++ __mark_reg_not_init(env, regs + regno); ++} ++ ++#define DEF_NOT_SUBREG (0) ++static void init_reg_state(struct bpf_verifier_env *env, ++ struct bpf_func_state *state) ++{ ++ struct bpf_reg_state *regs = state->regs; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) { +- regs[i].type = NOT_INIT; +- regs[i].imm = 0; +- regs[i].map_ptr = NULL; ++ mark_reg_not_init(env, regs, i); ++ regs[i].live = REG_LIVE_NONE; ++ regs[i].parent = NULL; ++ regs[i].subreg_def = DEF_NOT_SUBREG; + } + + /* frame pointer */ +- regs[BPF_REG_FP].type = FRAME_PTR; ++ regs[BPF_REG_FP].type = PTR_TO_STACK; ++ mark_reg_known_zero(env, regs, BPF_REG_FP); ++ regs[BPF_REG_FP].frameno = state->frameno; + + /* 1st arg to a function */ + regs[BPF_REG_1].type = PTR_TO_CTX; ++ mark_reg_known_zero(env, regs, BPF_REG_1); + } + +-static void mark_reg_unknown_value(struct reg_state *regs, u32 regno) ++#define BPF_MAIN_FUNC (-1) ++static void init_func_state(struct bpf_verifier_env *env, ++ struct bpf_func_state *state, ++ int callsite, int frameno, int subprogno) + { +- BUG_ON(regno >= MAX_BPF_REG); +- regs[regno].type = UNKNOWN_VALUE; +- regs[regno].imm = 0; +- regs[regno].map_ptr = NULL; ++ state->callsite = callsite; ++ state->frameno = frameno; ++ state->subprogno = subprogno; ++ init_reg_state(env, state); + } + + enum reg_arg_type { +@@ -502,44 +1079,760 @@ enum reg_arg_type { + DST_OP_NO_MARK /* same as above, check only, don't mark */ + }; + +-static int check_reg_arg(struct reg_state *regs, u32 regno, ++static int cmp_subprogs(const void *a, const void *b) ++{ ++ return ((struct bpf_subprog_info *)a)->start - ++ ((struct bpf_subprog_info *)b)->start; ++} ++ ++static int find_subprog(struct bpf_verifier_env *env, int off) ++{ ++ struct bpf_subprog_info *p; ++ ++ p = bsearch(&off, env->subprog_info, env->subprog_cnt, ++ sizeof(env->subprog_info[0]), cmp_subprogs); ++ if (!p) ++ return -ENOENT; ++ return p - env->subprog_info; ++ ++} ++ ++static int add_subprog(struct bpf_verifier_env *env, int off) ++{ ++ int insn_cnt = env->prog->len; ++ int ret; ++ ++ if (off >= insn_cnt || off < 0) { ++ verbose(env, "call to invalid destination\n"); ++ return -EINVAL; ++ } ++ ret = find_subprog(env, off); ++ if (ret >= 0) ++ return 0; ++ if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { ++ verbose(env, "too many subprograms\n"); ++ return -E2BIG; ++ } ++ env->subprog_info[env->subprog_cnt++].start = off; ++ sort(env->subprog_info, env->subprog_cnt, ++ sizeof(env->subprog_info[0]), cmp_subprogs, NULL); ++ return 0; ++} ++ ++static int check_subprogs(struct bpf_verifier_env *env) ++{ ++ int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; ++ struct bpf_subprog_info *subprog = env->subprog_info; ++ struct bpf_insn *insn = env->prog->insnsi; ++ int insn_cnt = env->prog->len; ++ ++ /* Add entry function. */ ++ ret = add_subprog(env, 0); ++ if (ret < 0) ++ return ret; ++ ++ /* determine subprog starts. The end is one before the next starts */ ++ for (i = 0; i < insn_cnt; i++) { ++ if (insn[i].code != (BPF_JMP | BPF_CALL)) ++ continue; ++ if (insn[i].src_reg != BPF_PSEUDO_CALL) ++ continue; ++ if (!env->allow_ptr_leaks) { ++ verbose(env, "function calls to other bpf functions are allowed for root only\n"); ++ return -EPERM; ++ } ++ ret = add_subprog(env, i + insn[i].imm + 1); ++ if (ret < 0) ++ return ret; ++ } ++ ++ /* Add a fake 'exit' subprog which could simplify subprog iteration ++ * logic. 'subprog_cnt' should not be increased. ++ */ ++ subprog[env->subprog_cnt].start = insn_cnt; ++ ++ if (env->log.level & BPF_LOG_LEVEL2) ++ for (i = 0; i < env->subprog_cnt; i++) ++ verbose(env, "func#%d @%d\n", i, subprog[i].start); ++ ++ /* now check that all jumps are within the same subprog */ ++ subprog_start = subprog[cur_subprog].start; ++ subprog_end = subprog[cur_subprog + 1].start; ++ for (i = 0; i < insn_cnt; i++) { ++ u8 code = insn[i].code; ++ ++ if (code == (BPF_JMP | BPF_CALL) && ++ insn[i].imm == BPF_FUNC_tail_call && ++ insn[i].src_reg != BPF_PSEUDO_CALL) ++ subprog[cur_subprog].has_tail_call = true; ++ if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ++ goto next; ++ if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) ++ goto next; ++ off = i + insn[i].off + 1; ++ if (off < subprog_start || off >= subprog_end) { ++ verbose(env, "jump out of range from insn %d to %d\n", i, off); ++ return -EINVAL; ++ } ++next: ++ if (i == subprog_end - 1) { ++ /* to avoid fall-through from one subprog into another ++ * the last insn of the subprog should be either exit ++ * or unconditional jump back ++ */ ++ if (code != (BPF_JMP | BPF_EXIT) && ++ code != (BPF_JMP | BPF_JA)) { ++ verbose(env, "last insn is not an exit or jmp\n"); ++ return -EINVAL; ++ } ++ subprog_start = subprog_end; ++ cur_subprog++; ++ if (cur_subprog < env->subprog_cnt) ++ subprog_end = subprog[cur_subprog + 1].start; ++ } ++ } ++ return 0; ++} ++ ++/* Parentage chain of this register (or stack slot) should take care of all ++ * issues like callee-saved registers, stack slot allocation time, etc. ++ */ ++static int mark_reg_read(struct bpf_verifier_env *env, ++ const struct bpf_reg_state *state, ++ struct bpf_reg_state *parent, u8 flag) ++{ ++ bool writes = parent == state->parent; /* Observe write marks */ ++ int cnt = 0; ++ ++ while (parent) { ++ /* if read wasn't screened by an earlier write ... */ ++ if (writes && state->live & REG_LIVE_WRITTEN) ++ break; ++ if (parent->live & REG_LIVE_DONE) { ++ verbose(env, "verifier BUG type %s var_off %lld off %d\n", ++ reg_type_str[parent->type], ++ parent->var_off.value, parent->off); ++ return -EFAULT; ++ } ++ /* The first condition is more likely to be true than the ++ * second, checked it first. ++ */ ++ if ((parent->live & REG_LIVE_READ) == flag || ++ parent->live & REG_LIVE_READ64) ++ /* The parentage chain never changes and ++ * this parent was already marked as LIVE_READ. ++ * There is no need to keep walking the chain again and ++ * keep re-marking all parents as LIVE_READ. ++ * This case happens when the same register is read ++ * multiple times without writes into it in-between. ++ * Also, if parent has the stronger REG_LIVE_READ64 set, ++ * then no need to set the weak REG_LIVE_READ32. ++ */ ++ break; ++ /* ... then we depend on parent's value */ ++ parent->live |= flag; ++ /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */ ++ if (flag == REG_LIVE_READ64) ++ parent->live &= ~REG_LIVE_READ32; ++ state = parent; ++ parent = state->parent; ++ writes = true; ++ cnt++; ++ } ++ ++ if (env->longest_mark_read_walk < cnt) ++ env->longest_mark_read_walk = cnt; ++ return 0; ++} ++ ++/* This function is supposed to be used by the following 32-bit optimization ++ * code only. It returns TRUE if the source or destination register operates ++ * on 64-bit, otherwise return FALSE. ++ */ ++static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, ++ u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t) ++{ ++ u8 code, class, op; ++ ++ code = insn->code; ++ class = BPF_CLASS(code); ++ op = BPF_OP(code); ++ if (class == BPF_JMP) { ++ /* BPF_EXIT for "main" will reach here. Return TRUE ++ * conservatively. ++ */ ++ if (op == BPF_EXIT) ++ return true; ++ if (op == BPF_CALL) { ++ /* BPF to BPF call will reach here because of marking ++ * caller saved clobber with DST_OP_NO_MARK for which we ++ * don't care the register def because they are anyway ++ * marked as NOT_INIT already. ++ */ ++ if (insn->src_reg == BPF_PSEUDO_CALL) ++ return false; ++ /* Helper call will reach here because of arg type ++ * check, conservatively return TRUE. ++ */ ++ if (t == SRC_OP) ++ return true; ++ ++ return false; ++ } ++ } ++ ++ if (class == BPF_ALU64 || class == BPF_JMP || ++ /* BPF_END always use BPF_ALU class. */ ++ (class == BPF_ALU && op == BPF_END && insn->imm == 64)) ++ return true; ++ ++ if (class == BPF_ALU || class == BPF_JMP32) ++ return false; ++ ++ if (class == BPF_LDX) { ++ if (t != SRC_OP) ++ return BPF_SIZE(code) == BPF_DW; ++ /* LDX source must be ptr. */ ++ return true; ++ } ++ ++ if (class == BPF_STX) { ++ if (reg->type != SCALAR_VALUE) ++ return true; ++ return BPF_SIZE(code) == BPF_DW; ++ } ++ ++ if (class == BPF_LD) { ++ u8 mode = BPF_MODE(code); ++ ++ /* LD_IMM64 */ ++ if (mode == BPF_IMM) ++ return true; ++ ++ /* Both LD_IND and LD_ABS return 32-bit data. */ ++ if (t != SRC_OP) ++ return false; ++ ++ /* Implicit ctx ptr. */ ++ if (regno == BPF_REG_6) ++ return true; ++ ++ /* Explicit source could be any width. */ ++ return true; ++ } ++ ++ if (class == BPF_ST) ++ /* The only source register for BPF_ST is a ptr. */ ++ return true; ++ ++ /* Conservatively return true at default. */ ++ return true; ++} ++ ++/* Return TRUE if INSN doesn't have explicit value define. */ ++static bool insn_no_def(struct bpf_insn *insn) ++{ ++ u8 class = BPF_CLASS(insn->code); ++ ++ return (class == BPF_JMP || class == BPF_JMP32 || ++ class == BPF_STX || class == BPF_ST); ++} ++ ++/* Return TRUE if INSN has defined any 32-bit value explicitly. */ ++static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn) ++{ ++ if (insn_no_def(insn)) ++ return false; ++ ++ return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP); ++} ++ ++static void mark_insn_zext(struct bpf_verifier_env *env, ++ struct bpf_reg_state *reg) ++{ ++ s32 def_idx = reg->subreg_def; ++ ++ if (def_idx == DEF_NOT_SUBREG) ++ return; ++ ++ env->insn_aux_data[def_idx - 1].zext_dst = true; ++ /* The dst will be zero extended, so won't be sub-register anymore. */ ++ reg->subreg_def = DEF_NOT_SUBREG; ++} ++ ++static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, + enum reg_arg_type t) + { ++ struct bpf_verifier_state *vstate = env->cur_state; ++ struct bpf_func_state *state = vstate->frame[vstate->curframe]; ++ struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; ++ struct bpf_reg_state *reg, *regs = state->regs; ++ bool rw64; ++ + if (regno >= MAX_BPF_REG) { +- verbose("R%d is invalid\n", regno); ++ verbose(env, "R%d is invalid\n", regno); + return -EINVAL; + } + ++ reg = ®s[regno]; ++ rw64 = is_reg64(env, insn, regno, reg, t); + if (t == SRC_OP) { + /* check whether register used as source operand can be read */ +- if (regs[regno].type == NOT_INIT) { +- verbose("R%d !read_ok\n", regno); ++ if (reg->type == NOT_INIT) { ++ verbose(env, "R%d !read_ok\n", regno); + return -EACCES; + } ++ /* We don't need to worry about FP liveness because it's read-only */ ++ if (regno == BPF_REG_FP) ++ return 0; ++ ++ if (rw64) ++ mark_insn_zext(env, reg); ++ ++ return mark_reg_read(env, reg, reg->parent, ++ rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32); + } else { + /* check whether register used as dest operand can be written to */ + if (regno == BPF_REG_FP) { +- verbose("frame pointer is read only\n"); ++ verbose(env, "frame pointer is read only\n"); + return -EACCES; + } ++ reg->live |= REG_LIVE_WRITTEN; ++ reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1; + if (t == DST_OP) +- mark_reg_unknown_value(regs, regno); ++ mark_reg_unknown(env, regs, regno); + } + return 0; + } + +-static int bpf_size_to_bytes(int bpf_size) ++/* for any branch, call, exit record the history of jmps in the given state */ ++static int push_jmp_history(struct bpf_verifier_env *env, ++ struct bpf_verifier_state *cur) + { +- if (bpf_size == BPF_W) +- return 4; +- else if (bpf_size == BPF_H) +- return 2; +- else if (bpf_size == BPF_B) +- return 1; +- else if (bpf_size == BPF_DW) +- return 8; +- else +- return -EINVAL; ++ u32 cnt = cur->jmp_history_cnt; ++ struct bpf_idx_pair *p; ++ ++ cnt++; ++ p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER); ++ if (!p) ++ return -ENOMEM; ++ p[cnt - 1].idx = env->insn_idx; ++ p[cnt - 1].prev_idx = env->prev_insn_idx; ++ cur->jmp_history = p; ++ cur->jmp_history_cnt = cnt; ++ return 0; ++} ++ ++/* Backtrack one insn at a time. If idx is not at the top of recorded ++ * history then previous instruction came from straight line execution. ++ */ ++static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, ++ u32 *history) ++{ ++ u32 cnt = *history; ++ ++ if (cnt && st->jmp_history[cnt - 1].idx == i) { ++ i = st->jmp_history[cnt - 1].prev_idx; ++ (*history)--; ++ } else { ++ i--; ++ } ++ return i; ++} ++ ++/* For given verifier state backtrack_insn() is called from the last insn to ++ * the first insn. Its purpose is to compute a bitmask of registers and ++ * stack slots that needs precision in the parent verifier state. ++ */ ++static int backtrack_insn(struct bpf_verifier_env *env, int idx, ++ u32 *reg_mask, u64 *stack_mask) ++{ ++ const struct bpf_insn_cbs cbs = { ++ .cb_print = verbose, ++ .private_data = env, ++ }; ++ struct bpf_insn *insn = env->prog->insnsi + idx; ++ u8 class = BPF_CLASS(insn->code); ++ u8 opcode = BPF_OP(insn->code); ++ u8 mode = BPF_MODE(insn->code); ++ u32 dreg = 1u << insn->dst_reg; ++ u32 sreg = 1u << insn->src_reg; ++ u32 spi; ++ ++ if (insn->code == 0) ++ return 0; ++ if (env->log.level & BPF_LOG_LEVEL) { ++ verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); ++ verbose(env, "%d: ", idx); ++ print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); ++ } ++ ++ if (class == BPF_ALU || class == BPF_ALU64) { ++ if (!(*reg_mask & dreg)) ++ return 0; ++ if (opcode == BPF_MOV) { ++ if (BPF_SRC(insn->code) == BPF_X) { ++ /* dreg = sreg ++ * dreg needs precision after this insn ++ * sreg needs precision before this insn ++ */ ++ *reg_mask &= ~dreg; ++ *reg_mask |= sreg; ++ } else { ++ /* dreg = K ++ * dreg needs precision after this insn. ++ * Corresponding register is already marked ++ * as precise=true in this verifier state. ++ * No further markings in parent are necessary ++ */ ++ *reg_mask &= ~dreg; ++ } ++ } else { ++ if (BPF_SRC(insn->code) == BPF_X) { ++ /* dreg += sreg ++ * both dreg and sreg need precision ++ * before this insn ++ */ ++ *reg_mask |= sreg; ++ } /* else dreg += K ++ * dreg still needs precision before this insn ++ */ ++ } ++ } else if (class == BPF_LDX) { ++ if (!(*reg_mask & dreg)) ++ return 0; ++ *reg_mask &= ~dreg; ++ ++ /* scalars can only be spilled into stack w/o losing precision. ++ * Load from any other memory can be zero extended. ++ * The desire to keep that precision is already indicated ++ * by 'precise' mark in corresponding register of this state. ++ * No further tracking necessary. ++ */ ++ if (insn->src_reg != BPF_REG_FP) ++ return 0; ++ if (BPF_SIZE(insn->code) != BPF_DW) ++ return 0; ++ ++ /* dreg = *(u64 *)[fp - off] was a fill from the stack. ++ * that [fp - off] slot contains scalar that needs to be ++ * tracked with precision ++ */ ++ spi = (-insn->off - 1) / BPF_REG_SIZE; ++ if (spi >= 64) { ++ verbose(env, "BUG spi %d\n", spi); ++ WARN_ONCE(1, "verifier backtracking bug"); ++ return -EFAULT; ++ } ++ *stack_mask |= 1ull << spi; ++ } else if (class == BPF_STX || class == BPF_ST) { ++ if (*reg_mask & dreg) ++ /* stx & st shouldn't be using _scalar_ dst_reg ++ * to access memory. It means backtracking ++ * encountered a case of pointer subtraction. ++ */ ++ return -ENOTSUPP; ++ /* scalars can only be spilled into stack */ ++ if (insn->dst_reg != BPF_REG_FP) ++ return 0; ++ if (BPF_SIZE(insn->code) != BPF_DW) ++ return 0; ++ spi = (-insn->off - 1) / BPF_REG_SIZE; ++ if (spi >= 64) { ++ verbose(env, "BUG spi %d\n", spi); ++ WARN_ONCE(1, "verifier backtracking bug"); ++ return -EFAULT; ++ } ++ if (!(*stack_mask & (1ull << spi))) ++ return 0; ++ *stack_mask &= ~(1ull << spi); ++ if (class == BPF_STX) ++ *reg_mask |= sreg; ++ } else if (class == BPF_JMP || class == BPF_JMP32) { ++ if (opcode == BPF_CALL) { ++ if (insn->src_reg == BPF_PSEUDO_CALL) ++ return -ENOTSUPP; ++ /* regular helper call sets R0 */ ++ *reg_mask &= ~1; ++ if (*reg_mask & 0x3f) { ++ /* if backtracing was looking for registers R1-R5 ++ * they should have been found already. ++ */ ++ verbose(env, "BUG regs %x\n", *reg_mask); ++ WARN_ONCE(1, "verifier backtracking bug"); ++ return -EFAULT; ++ } ++ } else if (opcode == BPF_EXIT) { ++ return -ENOTSUPP; ++ } ++ } else if (class == BPF_LD) { ++ if (!(*reg_mask & dreg)) ++ return 0; ++ *reg_mask &= ~dreg; ++ /* It's ld_imm64 or ld_abs or ld_ind. ++ * For ld_imm64 no further tracking of precision ++ * into parent is necessary ++ */ ++ if (mode == BPF_IND || mode == BPF_ABS) ++ /* to be analyzed */ ++ return -ENOTSUPP; ++ } ++ return 0; ++} ++ ++/* the scalar precision tracking algorithm: ++ * . at the start all registers have precise=false. ++ * . scalar ranges are tracked as normal through alu and jmp insns. ++ * . once precise value of the scalar register is used in: ++ * . ptr + scalar alu ++ * . if (scalar cond K|scalar) ++ * . helper_call(.., scalar, ...) where ARG_CONST is expected ++ * backtrack through the verifier states and mark all registers and ++ * stack slots with spilled constants that these scalar regisers ++ * should be precise. ++ * . during state pruning two registers (or spilled stack slots) ++ * are equivalent if both are not precise. ++ * ++ * Note the verifier cannot simply walk register parentage chain, ++ * since many different registers and stack slots could have been ++ * used to compute single precise scalar. ++ * ++ * The approach of starting with precise=true for all registers and then ++ * backtrack to mark a register as not precise when the verifier detects ++ * that program doesn't care about specific value (e.g., when helper ++ * takes register as ARG_ANYTHING parameter) is not safe. ++ * ++ * It's ok to walk single parentage chain of the verifier states. ++ * It's possible that this backtracking will go all the way till 1st insn. ++ * All other branches will be explored for needing precision later. ++ * ++ * The backtracking needs to deal with cases like: ++ * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0) ++ * r9 -= r8 ++ * r5 = r9 ++ * if r5 > 0x79f goto pc+7 ++ * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff)) ++ * r5 += 1 ++ * ... ++ * call bpf_perf_event_output#25 ++ * where .arg5_type = ARG_CONST_SIZE_OR_ZERO ++ * ++ * and this case: ++ * r6 = 1 ++ * call foo // uses callee's r6 inside to compute r0 ++ * r0 += r6 ++ * if r0 == 0 goto ++ * ++ * to track above reg_mask/stack_mask needs to be independent for each frame. ++ * ++ * Also if parent's curframe > frame where backtracking started, ++ * the verifier need to mark registers in both frames, otherwise callees ++ * may incorrectly prune callers. This is similar to ++ * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") ++ * ++ * For now backtracking falls back into conservative marking. ++ */ ++static void mark_all_scalars_precise(struct bpf_verifier_env *env, ++ struct bpf_verifier_state *st) ++{ ++ struct bpf_func_state *func; ++ struct bpf_reg_state *reg; ++ int i, j; ++ ++ /* big hammer: mark all scalars precise in this path. ++ * pop_stack may still get !precise scalars. ++ */ ++ for (; st; st = st->parent) ++ for (i = 0; i <= st->curframe; i++) { ++ func = st->frame[i]; ++ for (j = 0; j < BPF_REG_FP; j++) { ++ reg = &func->regs[j]; ++ if (reg->type != SCALAR_VALUE) ++ continue; ++ reg->precise = true; ++ } ++ for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { ++ if (func->stack[j].slot_type[0] != STACK_SPILL) ++ continue; ++ reg = &func->stack[j].spilled_ptr; ++ if (reg->type != SCALAR_VALUE) ++ continue; ++ reg->precise = true; ++ } ++ } ++} ++ ++static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, ++ int spi) ++{ ++ struct bpf_verifier_state *st = env->cur_state; ++ int first_idx = st->first_insn_idx; ++ int last_idx = env->insn_idx; ++ struct bpf_func_state *func; ++ struct bpf_reg_state *reg; ++ u32 reg_mask = regno >= 0 ? 1u << regno : 0; ++ u64 stack_mask = spi >= 0 ? 1ull << spi : 0; ++ bool skip_first = true; ++ bool new_marks = false; ++ int i, err; ++ ++ if (!env->allow_ptr_leaks) ++ /* backtracking is root only for now */ ++ return 0; ++ ++ func = st->frame[st->curframe]; ++ if (regno >= 0) { ++ reg = &func->regs[regno]; ++ if (reg->type != SCALAR_VALUE) { ++ WARN_ONCE(1, "backtracing misuse"); ++ return -EFAULT; ++ } ++ if (!reg->precise) ++ new_marks = true; ++ else ++ reg_mask = 0; ++ reg->precise = true; ++ } ++ ++ while (spi >= 0) { ++ if (func->stack[spi].slot_type[0] != STACK_SPILL) { ++ stack_mask = 0; ++ break; ++ } ++ reg = &func->stack[spi].spilled_ptr; ++ if (reg->type != SCALAR_VALUE) { ++ stack_mask = 0; ++ break; ++ } ++ if (!reg->precise) ++ new_marks = true; ++ else ++ stack_mask = 0; ++ reg->precise = true; ++ break; ++ } ++ ++ if (!new_marks) ++ return 0; ++ if (!reg_mask && !stack_mask) ++ return 0; ++ for (;;) { ++ DECLARE_BITMAP(mask, 64); ++ u32 history = st->jmp_history_cnt; ++ ++ if (env->log.level & BPF_LOG_LEVEL) ++ verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); ++ for (i = last_idx;;) { ++ if (skip_first) { ++ err = 0; ++ skip_first = false; ++ } else { ++ err = backtrack_insn(env, i, ®_mask, &stack_mask); ++ } ++ if (err == -ENOTSUPP) { ++ mark_all_scalars_precise(env, st); ++ return 0; ++ } else if (err) { ++ return err; ++ } ++ if (!reg_mask && !stack_mask) ++ /* Found assignment(s) into tracked register in this state. ++ * Since this state is already marked, just return. ++ * Nothing to be tracked further in the parent state. ++ */ ++ return 0; ++ if (i == first_idx) ++ break; ++ i = get_prev_insn_idx(st, i, &history); ++ if (i >= env->prog->len) { ++ /* This can happen if backtracking reached insn 0 ++ * and there are still reg_mask or stack_mask ++ * to backtrack. ++ * It means the backtracking missed the spot where ++ * particular register was initialized with a constant. ++ */ ++ verbose(env, "BUG backtracking idx %d\n", i); ++ WARN_ONCE(1, "verifier backtracking bug"); ++ return -EFAULT; ++ } ++ } ++ st = st->parent; ++ if (!st) ++ break; ++ ++ new_marks = false; ++ func = st->frame[st->curframe]; ++ bitmap_from_u64(mask, reg_mask); ++ for_each_set_bit(i, mask, 32) { ++ reg = &func->regs[i]; ++ if (reg->type != SCALAR_VALUE) { ++ reg_mask &= ~(1u << i); ++ continue; ++ } ++ if (!reg->precise) ++ new_marks = true; ++ reg->precise = true; ++ } ++ ++ bitmap_from_u64(mask, stack_mask); ++ for_each_set_bit(i, mask, 64) { ++ if (i >= func->allocated_stack / BPF_REG_SIZE) { ++ /* the sequence of instructions: ++ * 2: (bf) r3 = r10 ++ * 3: (7b) *(u64 *)(r3 -8) = r0 ++ * 4: (79) r4 = *(u64 *)(r10 -8) ++ * doesn't contain jmps. It's backtracked ++ * as a single block. ++ * During backtracking insn 3 is not recognized as ++ * stack access, so at the end of backtracking ++ * stack slot fp-8 is still marked in stack_mask. ++ * However the parent state may not have accessed ++ * fp-8 and it's "unallocated" stack space. ++ * In such case fallback to conservative. ++ */ ++ mark_all_scalars_precise(env, st); ++ return 0; ++ } ++ ++ if (func->stack[i].slot_type[0] != STACK_SPILL) { ++ stack_mask &= ~(1ull << i); ++ continue; ++ } ++ reg = &func->stack[i].spilled_ptr; ++ if (reg->type != SCALAR_VALUE) { ++ stack_mask &= ~(1ull << i); ++ continue; ++ } ++ if (!reg->precise) ++ new_marks = true; ++ reg->precise = true; ++ } ++ if (env->log.level & BPF_LOG_LEVEL) { ++ print_verifier_state(env, func); ++ verbose(env, "parent %s regs=%x stack=%llx marks\n", ++ new_marks ? "didn't have" : "already had", ++ reg_mask, stack_mask); ++ } ++ ++ if (!reg_mask && !stack_mask) ++ break; ++ if (!new_marks) ++ break; ++ ++ last_idx = st->last_insn_idx; ++ first_idx = st->first_insn_idx; ++ } ++ return 0; ++} ++ ++static int mark_chain_precision(struct bpf_verifier_env *env, int regno) ++{ ++ return __mark_chain_precision(env, regno, -1); ++} ++ ++static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi) ++{ ++ return __mark_chain_precision(env, -1, spi); + } + + static bool is_spillable_regtype(enum bpf_reg_type type) +@@ -549,129 +1842,932 @@ static bool is_spillable_regtype(enum bp + case PTR_TO_MAP_VALUE_OR_NULL: + case PTR_TO_STACK: + case PTR_TO_CTX: +- case FRAME_PTR: ++ case PTR_TO_PACKET: ++ case PTR_TO_PACKET_META: ++ case PTR_TO_PACKET_END: ++ case PTR_TO_FLOW_KEYS: + case CONST_PTR_TO_MAP: ++ case PTR_TO_SOCKET: ++ case PTR_TO_SOCKET_OR_NULL: ++ case PTR_TO_SOCK_COMMON: ++ case PTR_TO_SOCK_COMMON_OR_NULL: ++ case PTR_TO_TCP_SOCK: ++ case PTR_TO_TCP_SOCK_OR_NULL: ++ case PTR_TO_XDP_SOCK: + return true; + default: + return false; + } + } + ++/* Does this register contain a constant zero? */ ++static bool register_is_null(struct bpf_reg_state *reg) ++{ ++ return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); ++} ++ ++static bool register_is_const(struct bpf_reg_state *reg) ++{ ++ return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off); ++} ++ ++static bool __is_pointer_value(bool allow_ptr_leaks, ++ const struct bpf_reg_state *reg) ++{ ++ if (allow_ptr_leaks) ++ return false; ++ ++ return reg->type != SCALAR_VALUE; ++} ++ ++static void save_register_state(struct bpf_func_state *state, ++ int spi, struct bpf_reg_state *reg) ++{ ++ int i; ++ ++ state->stack[spi].spilled_ptr = *reg; ++ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; ++ ++ for (i = 0; i < BPF_REG_SIZE; i++) ++ state->stack[spi].slot_type[i] = STACK_SPILL; ++} ++ + /* check_stack_read/write functions track spill/fill of registers, + * stack boundary and alignment are checked in check_mem_access() + */ +-static int check_stack_write(struct verifier_state *state, int off, int size, +- int value_regno) ++static int check_stack_write(struct bpf_verifier_env *env, ++ struct bpf_func_state *state, /* func where register points to */ ++ int off, int size, int value_regno, int insn_idx) + { +- int i; ++ struct bpf_func_state *cur; /* state of the current function */ ++ int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; ++ u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg; ++ struct bpf_reg_state *reg = NULL; ++ ++ err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), ++ state->acquired_refs, true); ++ if (err) ++ return err; + /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, + * so it's aligned access and [off, off + size) are within stack limits + */ ++ if (!env->allow_ptr_leaks && ++ state->stack[spi].slot_type[0] == STACK_SPILL && ++ size != BPF_REG_SIZE) { ++ verbose(env, "attempt to corrupt spilled pointer on stack\n"); ++ return -EACCES; ++ } + +- if (value_regno >= 0 && +- is_spillable_regtype(state->regs[value_regno].type)) { +- ++ cur = env->cur_state->frame[env->cur_state->curframe]; ++ if (value_regno >= 0) ++ reg = &cur->regs[value_regno]; ++ ++ if (reg && size == BPF_REG_SIZE && register_is_const(reg) && ++ !register_is_null(reg) && env->allow_ptr_leaks) { ++ if (dst_reg != BPF_REG_FP) { ++ /* The backtracking logic can only recognize explicit ++ * stack slot address like [fp - 8]. Other spill of ++ * scalar via different register has to be conervative. ++ * Backtrack from here and mark all registers as precise ++ * that contributed into 'reg' being a constant. ++ */ ++ err = mark_chain_precision(env, value_regno); ++ if (err) ++ return err; ++ } ++ save_register_state(state, spi, reg); ++ } else if (reg && is_spillable_regtype(reg->type)) { + /* register containing pointer is being spilled into stack */ + if (size != BPF_REG_SIZE) { +- verbose("invalid size of register spill\n"); ++ verbose_linfo(env, insn_idx, "; "); ++ verbose(env, "invalid size of register spill\n"); + return -EACCES; + } + +- /* save register state */ +- state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = +- state->regs[value_regno]; ++ if (state != cur && reg->type == PTR_TO_STACK) { ++ verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); ++ return -EINVAL; ++ } + +- for (i = 0; i < BPF_REG_SIZE; i++) +- state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; ++ if (!env->allow_ptr_leaks) { ++ bool sanitize = false; ++ ++ if (state->stack[spi].slot_type[0] == STACK_SPILL && ++ register_is_const(&state->stack[spi].spilled_ptr)) ++ sanitize = true; ++ for (i = 0; i < BPF_REG_SIZE; i++) ++ if (state->stack[spi].slot_type[i] == STACK_MISC) { ++ sanitize = true; ++ break; ++ } ++ if (sanitize) { ++ int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; ++ int soff = (-spi - 1) * BPF_REG_SIZE; ++ ++ /* detected reuse of integer stack slot with a pointer ++ * which means either llvm is reusing stack slot or ++ * an attacker is trying to exploit CVE-2018-3639 ++ * (speculative store bypass) ++ * Have to sanitize that slot with preemptive ++ * store of zero. ++ */ ++ if (*poff && *poff != soff) { ++ /* disallow programs where single insn stores ++ * into two different stack slots, since verifier ++ * cannot sanitize them ++ */ ++ verbose(env, ++ "insn %d cannot access two stack slots fp%d and fp%d", ++ insn_idx, *poff, soff); ++ return -EINVAL; ++ } ++ *poff = soff; ++ } ++ } ++ save_register_state(state, spi, reg); + } else { +- /* regular write of data into stack */ +- state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = +- (struct reg_state) {}; ++ u8 type = STACK_MISC; ++ ++ /* regular write of data into stack destroys any spilled ptr */ ++ state->stack[spi].spilled_ptr.type = NOT_INIT; ++ /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ ++ if (state->stack[spi].slot_type[0] == STACK_SPILL) ++ for (i = 0; i < BPF_REG_SIZE; i++) ++ state->stack[spi].slot_type[i] = STACK_MISC; ++ ++ /* only mark the slot as written if all 8 bytes were written ++ * otherwise read propagation may incorrectly stop too soon ++ * when stack slots are partially written. ++ * This heuristic means that read propagation will be ++ * conservative, since it will add reg_live_read marks ++ * to stack slots all the way to first state when programs ++ * writes+reads less than 8 bytes ++ */ ++ if (size == BPF_REG_SIZE) ++ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; ++ ++ /* when we zero initialize stack slots mark them as such */ ++ if (reg && register_is_null(reg)) { ++ /* backtracking doesn't work for STACK_ZERO yet. */ ++ err = mark_chain_precision(env, value_regno); ++ if (err) ++ return err; ++ type = STACK_ZERO; ++ } + ++ /* Mark slots affected by this stack write. */ + for (i = 0; i < size; i++) +- state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; ++ state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = ++ type; + } + return 0; + } + +-static int check_stack_read(struct verifier_state *state, int off, int size, +- int value_regno) ++static int check_stack_read(struct bpf_verifier_env *env, ++ struct bpf_func_state *reg_state /* func where register points to */, ++ int off, int size, int value_regno) + { +- u8 *slot_type; +- int i; +- +- slot_type = &state->stack_slot_type[MAX_BPF_STACK + off]; ++ struct bpf_verifier_state *vstate = env->cur_state; ++ struct bpf_func_state *state = vstate->frame[vstate->curframe]; ++ int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; ++ struct bpf_reg_state *reg; ++ u8 *stype; ++ ++ if (reg_state->allocated_stack <= slot) { ++ verbose(env, "invalid read from stack off %d+0 size %d\n", ++ off, size); ++ return -EACCES; ++ } ++ stype = reg_state->stack[spi].slot_type; ++ reg = ®_state->stack[spi].spilled_ptr; + +- if (slot_type[0] == STACK_SPILL) { ++ if (stype[0] == STACK_SPILL) { + if (size != BPF_REG_SIZE) { +- verbose("invalid size of register spill\n"); +- return -EACCES; ++ if (reg->type != SCALAR_VALUE) { ++ verbose_linfo(env, env->insn_idx, "; "); ++ verbose(env, "invalid size of register fill\n"); ++ return -EACCES; ++ } ++ if (value_regno >= 0) { ++ mark_reg_unknown(env, state->regs, value_regno); ++ state->regs[value_regno].live |= REG_LIVE_WRITTEN; ++ } ++ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); ++ return 0; + } + for (i = 1; i < BPF_REG_SIZE; i++) { +- if (slot_type[i] != STACK_SPILL) { +- verbose("corrupted spill memory\n"); ++ if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { ++ verbose(env, "corrupted spill memory\n"); + return -EACCES; + } + } + +- if (value_regno >= 0) ++ if (value_regno >= 0) { + /* restore register state from stack */ +- state->regs[value_regno] = +- state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE]; +- return 0; ++ state->regs[value_regno] = *reg; ++ /* mark reg as written since spilled pointer state likely ++ * has its liveness marks cleared by is_state_visited() ++ * which resets stack/reg liveness for state transitions ++ */ ++ state->regs[value_regno].live |= REG_LIVE_WRITTEN; ++ } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) { ++ /* If value_regno==-1, the caller is asking us whether ++ * it is acceptable to use this value as a SCALAR_VALUE ++ * (e.g. for XADD). ++ * We must not allow unprivileged callers to do that ++ * with spilled pointers. ++ */ ++ verbose(env, "leaking pointer from stack off %d\n", ++ off); ++ return -EACCES; ++ } ++ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); + } else { ++ int zeros = 0; ++ + for (i = 0; i < size; i++) { +- if (slot_type[i] != STACK_MISC) { +- verbose("invalid read from stack off %d+%d size %d\n", +- off, i, size); +- return -EACCES; ++ if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC) ++ continue; ++ if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) { ++ zeros++; ++ continue; + } ++ verbose(env, "invalid read from stack off %d+%d size %d\n", ++ off, i, size); ++ return -EACCES; ++ } ++ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); ++ if (value_regno >= 0) { ++ if (zeros == size) { ++ /* any size read into register is zero extended, ++ * so the whole register == const_zero ++ */ ++ __mark_reg_const_zero(&state->regs[value_regno]); ++ /* backtracking doesn't support STACK_ZERO yet, ++ * so mark it precise here, so that later ++ * backtracking can stop here. ++ * Backtracking may not need this if this register ++ * doesn't participate in pointer adjustment. ++ * Forward propagation of precise flag is not ++ * necessary either. This mark is only to stop ++ * backtracking. Any register that contributed ++ * to const 0 was marked precise before spill. ++ */ ++ state->regs[value_regno].precise = true; ++ } else { ++ /* have read misc data from the stack */ ++ mark_reg_unknown(env, state->regs, value_regno); ++ } ++ state->regs[value_regno].live |= REG_LIVE_WRITTEN; + } +- if (value_regno >= 0) +- /* have read misc data from the stack */ +- mark_reg_unknown_value(state->regs, value_regno); +- return 0; + } ++ return 0; ++} ++ ++static int check_stack_access(struct bpf_verifier_env *env, ++ const struct bpf_reg_state *reg, ++ int off, int size) ++{ ++ /* Stack accesses must be at a fixed offset, so that we ++ * can determine what type of data were returned. See ++ * check_stack_read(). ++ */ ++ if (!tnum_is_const(reg->var_off)) { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, "variable stack access var_off=%s off=%d size=%d\n", ++ tn_buf, off, size); ++ return -EACCES; ++ } ++ ++ if (off >= 0 || off < -MAX_BPF_STACK) { ++ verbose(env, "invalid stack off=%d size=%d\n", off, size); ++ return -EACCES; ++ } ++ ++ return 0; ++} ++ ++static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, ++ int off, int size, enum bpf_access_type type) ++{ ++ struct bpf_reg_state *regs = cur_regs(env); ++ struct bpf_map *map = regs[regno].map_ptr; ++ u32 cap = bpf_map_flags_to_cap(map); ++ ++ if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) { ++ verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n", ++ map->value_size, off, size); ++ return -EACCES; ++ } ++ ++ if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) { ++ verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n", ++ map->value_size, off, size); ++ return -EACCES; ++ } ++ ++ return 0; + } + + /* check read/write into map element returned by bpf_map_lookup_elem() */ +-static int check_map_access(struct verifier_env *env, u32 regno, int off, +- int size) ++static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, ++ int size, bool zero_size_allowed) + { +- struct bpf_map *map = env->cur_state.regs[regno].map_ptr; ++ struct bpf_reg_state *regs = cur_regs(env); ++ struct bpf_map *map = regs[regno].map_ptr; + +- if (off < 0 || off + size > map->value_size) { +- verbose("invalid access to map value, value_size=%d off=%d size=%d\n", ++ if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) || ++ off + size > map->value_size) { ++ verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", + map->value_size, off, size); + return -EACCES; + } + return 0; + } + +-/* check access to 'struct bpf_context' fields */ +-static int check_ctx_access(struct verifier_env *env, int off, int size, +- enum bpf_access_type t) ++/* check read/write into a map element with possible variable offset */ ++static int check_map_access(struct bpf_verifier_env *env, u32 regno, ++ int off, int size, bool zero_size_allowed) ++{ ++ struct bpf_verifier_state *vstate = env->cur_state; ++ struct bpf_func_state *state = vstate->frame[vstate->curframe]; ++ struct bpf_reg_state *reg = &state->regs[regno]; ++ int err; ++ ++ /* We may have adjusted the register to this map value, so we ++ * need to try adding each of min_value and max_value to off ++ * to make sure our theoretical access will be safe. ++ */ ++ if (env->log.level & BPF_LOG_LEVEL) ++ print_verifier_state(env, state); ++ ++ /* The minimum value is only important with signed ++ * comparisons where we can't assume the floor of a ++ * value is 0. If we are using signed variables for our ++ * index'es we need to make sure that whatever we use ++ * will have a set floor within our range. ++ */ ++ if (reg->smin_value < 0 && ++ (reg->smin_value == S64_MIN || ++ (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) || ++ reg->smin_value + off < 0)) { ++ verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", ++ regno); ++ return -EACCES; ++ } ++ err = __check_map_access(env, regno, reg->smin_value + off, size, ++ zero_size_allowed); ++ if (err) { ++ verbose(env, "R%d min value is outside of the array range\n", ++ regno); ++ return err; ++ } ++ ++ /* If we haven't set a max value then we need to bail since we can't be ++ * sure we won't do bad things. ++ * If reg->umax_value + off could overflow, treat that as unbounded too. ++ */ ++ if (reg->umax_value >= BPF_MAX_VAR_OFF) { ++ verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n", ++ regno); ++ return -EACCES; ++ } ++ err = __check_map_access(env, regno, reg->umax_value + off, size, ++ zero_size_allowed); ++ if (err) ++ verbose(env, "R%d max value is outside of the array range\n", ++ regno); ++ ++ if (map_value_has_spin_lock(reg->map_ptr)) { ++ u32 lock = reg->map_ptr->spin_lock_off; ++ ++ /* if any part of struct bpf_spin_lock can be touched by ++ * load/store reject this program. ++ * To check that [x1, x2) overlaps with [y1, y2) ++ * it is sufficient to check x1 < y2 && y1 < x2. ++ */ ++ if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) && ++ lock < reg->umax_value + off + size) { ++ verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n"); ++ return -EACCES; ++ } ++ } ++ return err; ++} ++ ++#define MAX_PACKET_OFF 0xffff ++ ++static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, ++ const struct bpf_call_arg_meta *meta, ++ enum bpf_access_type t) ++{ ++ switch (env->prog->type) { ++ /* Program types only with direct read access go here! */ ++ case BPF_PROG_TYPE_LWT_IN: ++ case BPF_PROG_TYPE_LWT_OUT: ++ case BPF_PROG_TYPE_LWT_SEG6LOCAL: ++ case BPF_PROG_TYPE_SK_REUSEPORT: ++ case BPF_PROG_TYPE_FLOW_DISSECTOR: ++ case BPF_PROG_TYPE_CGROUP_SKB: ++ if (t == BPF_WRITE) ++ return false; ++ /* fallthrough */ ++ ++ /* Program types with direct read + write access go here! */ ++ case BPF_PROG_TYPE_SCHED_CLS: ++ case BPF_PROG_TYPE_SCHED_ACT: ++ case BPF_PROG_TYPE_XDP: ++ case BPF_PROG_TYPE_LWT_XMIT: ++ case BPF_PROG_TYPE_SK_SKB: ++ case BPF_PROG_TYPE_SK_MSG: ++ if (meta) ++ return meta->pkt_access; ++ ++ env->seen_direct_write = true; ++ return true; ++ ++ case BPF_PROG_TYPE_CGROUP_SOCKOPT: ++ if (t == BPF_WRITE) ++ env->seen_direct_write = true; ++ ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, ++ int off, int size, bool zero_size_allowed) ++{ ++ struct bpf_reg_state *regs = cur_regs(env); ++ struct bpf_reg_state *reg = ®s[regno]; ++ ++ if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) || ++ (u64)off + size > reg->range) { ++ verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", ++ off, size, regno, reg->id, reg->off, reg->range); ++ return -EACCES; ++ } ++ return 0; ++} ++ ++static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, ++ int size, bool zero_size_allowed) ++{ ++ struct bpf_reg_state *regs = cur_regs(env); ++ struct bpf_reg_state *reg = ®s[regno]; ++ int err; ++ ++ /* We may have added a variable offset to the packet pointer; but any ++ * reg->range we have comes after that. We are only checking the fixed ++ * offset. ++ */ ++ ++ /* We don't allow negative numbers, because we aren't tracking enough ++ * detail to prove they're safe. ++ */ ++ if (reg->smin_value < 0) { ++ verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", ++ regno); ++ return -EACCES; ++ } ++ err = __check_packet_access(env, regno, off, size, zero_size_allowed); ++ if (err) { ++ verbose(env, "R%d offset is outside of the packet\n", regno); ++ return err; ++ } ++ ++ /* __check_packet_access has made sure "off + size - 1" is within u16. ++ * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff, ++ * otherwise find_good_pkt_pointers would have refused to set range info ++ * that __check_packet_access would have rejected this pkt access. ++ * Therefore, "off + reg->umax_value + size - 1" won't overflow u32. ++ */ ++ env->prog->aux->max_pkt_offset = ++ max_t(u32, env->prog->aux->max_pkt_offset, ++ off + reg->umax_value + size - 1); ++ ++ return err; ++} ++ ++/* check access to 'struct bpf_context' fields. Supports fixed offsets only */ ++static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, ++ enum bpf_access_type t, enum bpf_reg_type *reg_type) + { +- if (env->prog->aux->ops->is_valid_access && +- env->prog->aux->ops->is_valid_access(off, size, t)) ++ struct bpf_insn_access_aux info = { ++ .reg_type = *reg_type, ++ }; ++ ++ if (env->ops->is_valid_access && ++ env->ops->is_valid_access(off, size, t, env->prog, &info)) { ++ /* A non zero info.ctx_field_size indicates that this field is a ++ * candidate for later verifier transformation to load the whole ++ * field and then apply a mask when accessed with a narrower ++ * access than actual ctx access size. A zero info.ctx_field_size ++ * will only allow for whole field access and rejects any other ++ * type of narrower access. ++ */ ++ *reg_type = info.reg_type; ++ ++ env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; ++ /* remember the offset of last byte accessed in ctx */ ++ if (env->prog->aux->max_ctx_offset < off + size) ++ env->prog->aux->max_ctx_offset = off + size; + return 0; ++ } + +- verbose("invalid bpf_context access off=%d size=%d\n", off, size); ++ verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size); + return -EACCES; + } + +-static bool is_pointer_value(struct verifier_env *env, int regno) ++static int check_flow_keys_access(struct bpf_verifier_env *env, int off, ++ int size) + { +- if (env->allow_ptr_leaks) +- return false; ++ if (size < 0 || off < 0 || ++ (u64)off + size > sizeof(struct bpf_flow_keys)) { ++ verbose(env, "invalid access to flow keys off=%d size=%d\n", ++ off, size); ++ return -EACCES; ++ } ++ return 0; ++} + +- switch (env->cur_state.regs[regno].type) { +- case UNKNOWN_VALUE: +- case CONST_IMM: +- return false; ++static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, ++ u32 regno, int off, int size, ++ enum bpf_access_type t) ++{ ++ struct bpf_reg_state *regs = cur_regs(env); ++ struct bpf_reg_state *reg = ®s[regno]; ++ struct bpf_insn_access_aux info = {}; ++ bool valid; ++ ++ if (reg->smin_value < 0) { ++ verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", ++ regno); ++ return -EACCES; ++ } ++ ++ switch (reg->type) { ++ case PTR_TO_SOCK_COMMON: ++ valid = bpf_sock_common_is_valid_access(off, size, t, &info); ++ break; + default: +- return true; ++ valid = false; ++ } ++ ++ ++ if (valid) { ++ env->insn_aux_data[insn_idx].ctx_field_size = ++ info.ctx_field_size; ++ return 0; ++ } ++ ++ verbose(env, "R%d invalid %s access off=%d size=%d\n", ++ regno, reg_type_str[reg->type], off, size); ++ ++ return -EACCES; ++} ++ ++static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) ++{ ++ return cur_regs(env) + regno; ++} ++ ++static bool is_pointer_value(struct bpf_verifier_env *env, int regno) ++{ ++ return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); ++} ++ ++static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) ++{ ++ const struct bpf_reg_state *reg = reg_state(env, regno); ++ ++ return reg->type == PTR_TO_CTX; ++} ++ ++static bool is_sk_reg(struct bpf_verifier_env *env, int regno) ++{ ++ const struct bpf_reg_state *reg = reg_state(env, regno); ++ ++ return type_is_sk_pointer(reg->type); ++} ++ ++static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) ++{ ++ const struct bpf_reg_state *reg = reg_state(env, regno); ++ ++ return type_is_pkt_pointer(reg->type); ++} ++ ++static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) ++{ ++ const struct bpf_reg_state *reg = reg_state(env, regno); ++ ++ /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */ ++ return reg->type == PTR_TO_FLOW_KEYS; ++} ++ ++static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, ++ const struct bpf_reg_state *reg, ++ int off, int size, bool strict) ++{ ++ struct tnum reg_off; ++ int ip_align; ++ ++ /* Byte size accesses are always allowed. */ ++ if (!strict || size == 1) ++ return 0; ++ ++ /* For platforms that do not have a Kconfig enabling ++ * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of ++ * NET_IP_ALIGN is universally set to '2'. And on platforms ++ * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get ++ * to this code only in strict mode where we want to emulate ++ * the NET_IP_ALIGN==2 checking. Therefore use an ++ * unconditional IP align value of '2'. ++ */ ++ ip_align = 2; ++ ++ reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off)); ++ if (!tnum_is_aligned(reg_off, size)) { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, ++ "misaligned packet access off %d+%s+%d+%d size %d\n", ++ ip_align, tn_buf, reg->off, off, size); ++ return -EACCES; ++ } ++ ++ return 0; ++} ++ ++static int check_generic_ptr_alignment(struct bpf_verifier_env *env, ++ const struct bpf_reg_state *reg, ++ const char *pointer_desc, ++ int off, int size, bool strict) ++{ ++ struct tnum reg_off; ++ ++ /* Byte size accesses are always allowed. */ ++ if (!strict || size == 1) ++ return 0; ++ ++ reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off)); ++ if (!tnum_is_aligned(reg_off, size)) { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, "misaligned %saccess off %s+%d+%d size %d\n", ++ pointer_desc, tn_buf, reg->off, off, size); ++ return -EACCES; ++ } ++ ++ return 0; ++} ++ ++static int check_ptr_alignment(struct bpf_verifier_env *env, ++ const struct bpf_reg_state *reg, int off, ++ int size, bool strict_alignment_once) ++{ ++ bool strict = env->strict_alignment || strict_alignment_once; ++ const char *pointer_desc = ""; ++ ++ switch (reg->type) { ++ case PTR_TO_PACKET: ++ case PTR_TO_PACKET_META: ++ /* Special case, because of NET_IP_ALIGN. Given metadata sits ++ * right in front, treat it the very same way. ++ */ ++ return check_pkt_ptr_alignment(env, reg, off, size, strict); ++ case PTR_TO_FLOW_KEYS: ++ pointer_desc = "flow keys "; ++ break; ++ case PTR_TO_MAP_VALUE: ++ pointer_desc = "value "; ++ break; ++ case PTR_TO_CTX: ++ pointer_desc = "context "; ++ break; ++ case PTR_TO_STACK: ++ pointer_desc = "stack "; ++ /* The stack spill tracking logic in check_stack_write() ++ * and check_stack_read() relies on stack accesses being ++ * aligned. ++ */ ++ strict = true; ++ break; ++ case PTR_TO_SOCKET: ++ pointer_desc = "sock "; ++ break; ++ case PTR_TO_SOCK_COMMON: ++ pointer_desc = "sock_common "; ++ break; ++ case PTR_TO_TCP_SOCK: ++ pointer_desc = "tcp_sock "; ++ break; ++ case PTR_TO_XDP_SOCK: ++ pointer_desc = "xdp_sock "; ++ break; ++ default: ++ break; ++ } ++ return check_generic_ptr_alignment(env, reg, pointer_desc, off, size, ++ strict); ++} ++ ++static int update_stack_depth(struct bpf_verifier_env *env, ++ const struct bpf_func_state *func, ++ int off) ++{ ++ u16 stack = env->subprog_info[func->subprogno].stack_depth; ++ ++ if (stack >= -off) ++ return 0; ++ ++ /* update known max for given subprogram */ ++ env->subprog_info[func->subprogno].stack_depth = -off; ++ return 0; ++} ++ ++/* starting from main bpf function walk all instructions of the function ++ * and recursively walk all callees that given function can call. ++ * Ignore jump and exit insns. ++ * Since recursion is prevented by check_cfg() this algorithm ++ * only needs a local stack of MAX_CALL_FRAMES to remember callsites ++ */ ++static int check_max_stack_depth(struct bpf_verifier_env *env) ++{ ++ int depth = 0, frame = 0, idx = 0, i = 0, subprog_end; ++ struct bpf_subprog_info *subprog = env->subprog_info; ++ struct bpf_insn *insn = env->prog->insnsi; ++ int ret_insn[MAX_CALL_FRAMES]; ++ int ret_prog[MAX_CALL_FRAMES]; ++ ++process_func: ++ /* protect against potential stack overflow that might happen when ++ * bpf2bpf calls get combined with tailcalls. Limit the caller's stack ++ * depth for such case down to 256 so that the worst case scenario ++ * would result in 8k stack size (32 which is tailcall limit * 256 = ++ * 8k). ++ * ++ * To get the idea what might happen, see an example: ++ * func1 -> sub rsp, 128 ++ * subfunc1 -> sub rsp, 256 ++ * tailcall1 -> add rsp, 256 ++ * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320) ++ * subfunc2 -> sub rsp, 64 ++ * subfunc22 -> sub rsp, 128 ++ * tailcall2 -> add rsp, 128 ++ * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416) ++ * ++ * tailcall will unwind the current stack frame but it will not get rid ++ * of caller's stack as shown on the example above. ++ */ ++ if (idx && subprog[idx].has_tail_call && depth >= 256) { ++ verbose(env, ++ "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n", ++ depth); ++ return -EACCES; ++ } ++ /* round up to 32-bytes, since this is granularity ++ * of interpreter stack size ++ */ ++ depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); ++ if (depth > MAX_BPF_STACK) { ++ verbose(env, "combined stack size of %d calls is %d. Too large\n", ++ frame + 1, depth); ++ return -EACCES; ++ } ++continue_func: ++ subprog_end = subprog[idx + 1].start; ++ for (; i < subprog_end; i++) { ++ if (insn[i].code != (BPF_JMP | BPF_CALL)) ++ continue; ++ if (insn[i].src_reg != BPF_PSEUDO_CALL) ++ continue; ++ /* remember insn and function to return to */ ++ ret_insn[frame] = i + 1; ++ ret_prog[frame] = idx; ++ ++ /* find the callee */ ++ i = i + insn[i].imm + 1; ++ idx = find_subprog(env, i); ++ if (idx < 0) { ++ WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", ++ i); ++ return -EFAULT; ++ } ++ frame++; ++ if (frame >= MAX_CALL_FRAMES) { ++ verbose(env, "the call stack of %d frames is too deep !\n", ++ frame); ++ return -E2BIG; ++ } ++ goto process_func; ++ } ++ /* end of for() loop means the last insn of the 'subprog' ++ * was reached. Doesn't matter whether it was JA or EXIT ++ */ ++ if (frame == 0) ++ return 0; ++ depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); ++ frame--; ++ i = ret_insn[frame]; ++ idx = ret_prog[frame]; ++ goto continue_func; ++} ++ ++#ifndef CONFIG_BPF_JIT_ALWAYS_ON ++static int get_callee_stack_depth(struct bpf_verifier_env *env, ++ const struct bpf_insn *insn, int idx) ++{ ++ int start = idx + insn->imm + 1, subprog; ++ ++ subprog = find_subprog(env, start); ++ if (subprog < 0) { ++ WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", ++ start); ++ return -EFAULT; + } ++ return env->subprog_info[subprog].stack_depth; ++} ++#endif ++ ++static int check_ctx_reg(struct bpf_verifier_env *env, ++ const struct bpf_reg_state *reg, int regno) ++{ ++ /* Access to ctx or passing it to a helper is only allowed in ++ * its original, unmodified form. ++ */ ++ ++ if (reg->off) { ++ verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n", ++ regno, reg->off); ++ return -EACCES; ++ } ++ ++ if (!tnum_is_const(reg->var_off) || reg->var_off.value) { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf); ++ return -EACCES; ++ } ++ ++ return 0; ++} ++ ++static int check_tp_buffer_access(struct bpf_verifier_env *env, ++ const struct bpf_reg_state *reg, ++ int regno, int off, int size) ++{ ++ if (off < 0) { ++ verbose(env, ++ "R%d invalid tracepoint buffer access: off=%d, size=%d", ++ regno, off, size); ++ return -EACCES; ++ } ++ if (!tnum_is_const(reg->var_off) || reg->var_off.value) { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, ++ "R%d invalid variable buffer offset: off=%d, var_off=%s", ++ regno, off, tn_buf); ++ return -EACCES; ++ } ++ if (off + size > env->prog->aux->max_tp_access) ++ env->prog->aux->max_tp_access = off + size; ++ ++ return 0; ++} ++ ++ ++/* truncate register to smaller size (in bytes) ++ * must be called with size < BPF_REG_SIZE ++ */ ++static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) ++{ ++ u64 mask; ++ ++ /* clear high bits in bit representation */ ++ reg->var_off = tnum_cast(reg->var_off, size); ++ ++ /* fix arithmetic bounds */ ++ mask = ((u64)1 << (size * 8)) - 1; ++ if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) { ++ reg->umin_value &= mask; ++ reg->umax_value &= mask; ++ } else { ++ reg->umin_value = 0; ++ reg->umax_value = mask; ++ } ++ reg->smin_value = reg->umin_value; ++ reg->smax_value = reg->umax_value; + } + + /* check whether memory at (regno + off) is accessible for t = (read | write) +@@ -680,225 +2776,692 @@ static bool is_pointer_value(struct veri + * if t==write && value_regno==-1, some unknown value is stored into memory + * if t==read && value_regno==-1, don't care what we read from memory + */ +-static int check_mem_access(struct verifier_env *env, u32 regno, int off, +- int bpf_size, enum bpf_access_type t, +- int value_regno) ++static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, ++ int off, int bpf_size, enum bpf_access_type t, ++ int value_regno, bool strict_alignment_once) + { +- struct verifier_state *state = &env->cur_state; ++ struct bpf_reg_state *regs = cur_regs(env); ++ struct bpf_reg_state *reg = regs + regno; ++ struct bpf_func_state *state; + int size, err = 0; + +- if (state->regs[regno].type == PTR_TO_STACK) +- off += state->regs[regno].imm; +- + size = bpf_size_to_bytes(bpf_size); + if (size < 0) + return size; + +- if (off % size != 0) { +- verbose("misaligned access off %d size %d\n", off, size); +- return -EACCES; +- } ++ /* alignment checks will add in reg->off themselves */ ++ err = check_ptr_alignment(env, reg, off, size, strict_alignment_once); ++ if (err) ++ return err; ++ ++ /* for access checks, reg->off is just part of off */ ++ off += reg->off; + +- if (state->regs[regno].type == PTR_TO_MAP_VALUE) { ++ if (reg->type == PTR_TO_MAP_VALUE) { + if (t == BPF_WRITE && value_regno >= 0 && + is_pointer_value(env, value_regno)) { +- verbose("R%d leaks addr into map\n", value_regno); ++ verbose(env, "R%d leaks addr into map\n", value_regno); + return -EACCES; + } +- err = check_map_access(env, regno, off, size); ++ err = check_map_access_type(env, regno, off, size, t); ++ if (err) ++ return err; ++ err = check_map_access(env, regno, off, size, false); + if (!err && t == BPF_READ && value_regno >= 0) +- mark_reg_unknown_value(state->regs, value_regno); ++ mark_reg_unknown(env, regs, value_regno); ++ ++ } else if (reg->type == PTR_TO_CTX) { ++ enum bpf_reg_type reg_type = SCALAR_VALUE; + +- } else if (state->regs[regno].type == PTR_TO_CTX) { + if (t == BPF_WRITE && value_regno >= 0 && + is_pointer_value(env, value_regno)) { +- verbose("R%d leaks addr into ctx\n", value_regno); ++ verbose(env, "R%d leaks addr into ctx\n", value_regno); + return -EACCES; + } +- err = check_ctx_access(env, off, size, t); +- if (!err && t == BPF_READ && value_regno >= 0) +- mark_reg_unknown_value(state->regs, value_regno); + +- } else if (state->regs[regno].type == FRAME_PTR || +- state->regs[regno].type == PTR_TO_STACK) { +- if (off >= 0 || off < -MAX_BPF_STACK) { +- verbose("invalid stack off=%d size=%d\n", off, size); ++ err = check_ctx_reg(env, reg, regno); ++ if (err < 0) ++ return err; ++ ++ err = check_ctx_access(env, insn_idx, off, size, t, ®_type); ++ if (!err && t == BPF_READ && value_regno >= 0) { ++ /* ctx access returns either a scalar, or a ++ * PTR_TO_PACKET[_META,_END]. In the latter ++ * case, we know the offset is zero. ++ */ ++ if (reg_type == SCALAR_VALUE) { ++ mark_reg_unknown(env, regs, value_regno); ++ } else { ++ mark_reg_known_zero(env, regs, ++ value_regno); ++ if (reg_type_may_be_null(reg_type)) ++ regs[value_regno].id = ++env->id_gen; ++ /* A load of ctx field could have different ++ * actual load size with the one encoded in the ++ * insn. When the dst is PTR, it is for sure not ++ * a sub-register. ++ */ ++ regs[value_regno].subreg_def = DEF_NOT_SUBREG; ++ } ++ regs[value_regno].type = reg_type; ++ } ++ ++ } else if (reg->type == PTR_TO_STACK) { ++ off += reg->var_off.value; ++ err = check_stack_access(env, reg, off, size); ++ if (err) ++ return err; ++ ++ state = func(env, reg); ++ err = update_stack_depth(env, state, off); ++ if (err) ++ return err; ++ ++ if (t == BPF_WRITE) ++ err = check_stack_write(env, state, off, size, ++ value_regno, insn_idx); ++ else ++ err = check_stack_read(env, state, off, size, ++ value_regno); ++ } else if (reg_is_pkt_pointer(reg)) { ++ if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { ++ verbose(env, "cannot write into packet\n"); ++ return -EACCES; ++ } ++ if (t == BPF_WRITE && value_regno >= 0 && ++ is_pointer_value(env, value_regno)) { ++ verbose(env, "R%d leaks addr into packet\n", ++ value_regno); ++ return -EACCES; ++ } ++ err = check_packet_access(env, regno, off, size, false); ++ if (!err && t == BPF_READ && value_regno >= 0) ++ mark_reg_unknown(env, regs, value_regno); ++ } else if (reg->type == PTR_TO_FLOW_KEYS) { ++ if (t == BPF_WRITE && value_regno >= 0 && ++ is_pointer_value(env, value_regno)) { ++ verbose(env, "R%d leaks addr into flow keys\n", ++ value_regno); + return -EACCES; + } ++ ++ err = check_flow_keys_access(env, off, size); ++ if (!err && t == BPF_READ && value_regno >= 0) ++ mark_reg_unknown(env, regs, value_regno); ++ } else if (type_is_sk_pointer(reg->type)) { + if (t == BPF_WRITE) { +- if (!env->allow_ptr_leaks && +- state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL && +- size != BPF_REG_SIZE) { +- verbose("attempt to corrupt spilled pointer on stack\n"); +- return -EACCES; +- } +- err = check_stack_write(state, off, size, value_regno); +- } else { +- err = check_stack_read(state, off, size, value_regno); ++ verbose(env, "R%d cannot write into %s\n", ++ regno, reg_type_str[reg->type]); ++ return -EACCES; + } ++ err = check_sock_access(env, insn_idx, regno, off, size, t); ++ if (!err && value_regno >= 0) ++ mark_reg_unknown(env, regs, value_regno); ++ } else if (reg->type == PTR_TO_TP_BUFFER) { ++ err = check_tp_buffer_access(env, reg, regno, off, size); ++ if (!err && t == BPF_READ && value_regno >= 0) ++ mark_reg_unknown(env, regs, value_regno); + } else { +- verbose("R%d invalid mem access '%s'\n", +- regno, reg_type_str[state->regs[regno].type]); ++ verbose(env, "R%d invalid mem access '%s'\n", regno, ++ reg_type_str[reg->type]); + return -EACCES; + } ++ ++ if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && ++ regs[value_regno].type == SCALAR_VALUE) { ++ /* b/h/w load zero-extends, mark upper bits as known 0 */ ++ coerce_reg_to_size(®s[value_regno], size); ++ } + return err; + } + +-static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) ++static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) + { +- struct reg_state *regs = env->cur_state.regs; + int err; + + if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || + insn->imm != 0) { +- verbose("BPF_XADD uses reserved fields\n"); ++ verbose(env, "BPF_XADD uses reserved fields\n"); + return -EINVAL; + } + + /* check src1 operand */ +- err = check_reg_arg(regs, insn->src_reg, SRC_OP); ++ err = check_reg_arg(env, insn->src_reg, SRC_OP); + if (err) + return err; + + /* check src2 operand */ +- err = check_reg_arg(regs, insn->dst_reg, SRC_OP); ++ err = check_reg_arg(env, insn->dst_reg, SRC_OP); + if (err) + return err; + ++ if (is_pointer_value(env, insn->src_reg)) { ++ verbose(env, "R%d leaks addr into mem\n", insn->src_reg); ++ return -EACCES; ++ } ++ ++ if (is_ctx_reg(env, insn->dst_reg) || ++ is_pkt_reg(env, insn->dst_reg) || ++ is_flow_key_reg(env, insn->dst_reg) || ++ is_sk_reg(env, insn->dst_reg)) { ++ verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", ++ insn->dst_reg, ++ reg_type_str[reg_state(env, insn->dst_reg)->type]); ++ return -EACCES; ++ } ++ + /* check whether atomic_add can read the memory */ +- err = check_mem_access(env, insn->dst_reg, insn->off, +- BPF_SIZE(insn->code), BPF_READ, -1); ++ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, ++ BPF_SIZE(insn->code), BPF_READ, -1, true); + if (err) + return err; + + /* check whether atomic_add can write into the same memory */ +- return check_mem_access(env, insn->dst_reg, insn->off, +- BPF_SIZE(insn->code), BPF_WRITE, -1); ++ return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, ++ BPF_SIZE(insn->code), BPF_WRITE, -1, true); ++} ++ ++static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, ++ int off, int access_size, ++ bool zero_size_allowed) ++{ ++ struct bpf_reg_state *reg = reg_state(env, regno); ++ ++ if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || ++ access_size < 0 || (access_size == 0 && !zero_size_allowed)) { ++ if (tnum_is_const(reg->var_off)) { ++ verbose(env, "invalid stack type R%d off=%d access_size=%d\n", ++ regno, off, access_size); ++ } else { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n", ++ regno, tn_buf, access_size); ++ } ++ return -EACCES; ++ } ++ return 0; + } + + /* when register 'regno' is passed into function that will read 'access_size' + * bytes from that pointer, make sure that it's within stack boundary +- * and all elements of stack are initialized ++ * and all elements of stack are initialized. ++ * Unlike most pointer bounds-checking functions, this one doesn't take an ++ * 'off' argument, so it has to add in reg->off itself. + */ +-static int check_stack_boundary(struct verifier_env *env, +- int regno, int access_size) ++static int check_stack_boundary(struct bpf_verifier_env *env, int regno, ++ int access_size, bool zero_size_allowed, ++ struct bpf_call_arg_meta *meta) + { +- struct verifier_state *state = &env->cur_state; +- struct reg_state *regs = state->regs; +- int off, i; ++ struct bpf_reg_state *reg = reg_state(env, regno); ++ struct bpf_func_state *state = func(env, reg); ++ int err, min_off, max_off, i, j, slot, spi; ++ ++ if (reg->type != PTR_TO_STACK) { ++ /* Allow zero-byte read from NULL, regardless of pointer type */ ++ if (zero_size_allowed && access_size == 0 && ++ register_is_null(reg)) ++ return 0; + +- if (regs[regno].type != PTR_TO_STACK) ++ verbose(env, "R%d type=%s expected=%s\n", regno, ++ reg_type_str[reg->type], ++ reg_type_str[PTR_TO_STACK]); + return -EACCES; ++ } + +- off = regs[regno].imm; +- if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || +- access_size <= 0) { +- verbose("invalid stack type R%d off=%d access_size=%d\n", +- regno, off, access_size); ++ if (tnum_is_const(reg->var_off)) { ++ min_off = max_off = reg->var_off.value + reg->off; ++ err = __check_stack_boundary(env, regno, min_off, access_size, ++ zero_size_allowed); ++ if (err) ++ return err; ++ } else { ++ /* Variable offset is prohibited for unprivileged mode for ++ * simplicity since it requires corresponding support in ++ * Spectre masking for stack ALU. ++ * See also retrieve_ptr_limit(). ++ */ ++ if (!env->allow_ptr_leaks) { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n", ++ regno, tn_buf); ++ return -EACCES; ++ } ++ /* Only initialized buffer on stack is allowed to be accessed ++ * with variable offset. With uninitialized buffer it's hard to ++ * guarantee that whole memory is marked as initialized on ++ * helper return since specific bounds are unknown what may ++ * cause uninitialized stack leaking. ++ */ ++ if (meta && meta->raw_mode) ++ meta = NULL; ++ ++ if (reg->smax_value >= BPF_MAX_VAR_OFF || ++ reg->smax_value <= -BPF_MAX_VAR_OFF) { ++ verbose(env, "R%d unbounded indirect variable offset stack access\n", ++ regno); ++ return -EACCES; ++ } ++ min_off = reg->smin_value + reg->off; ++ max_off = reg->smax_value + reg->off; ++ err = __check_stack_boundary(env, regno, min_off, access_size, ++ zero_size_allowed); ++ if (err) { ++ verbose(env, "R%d min value is outside of stack bound\n", ++ regno); ++ return err; ++ } ++ err = __check_stack_boundary(env, regno, max_off, access_size, ++ zero_size_allowed); ++ if (err) { ++ verbose(env, "R%d max value is outside of stack bound\n", ++ regno); ++ return err; ++ } ++ } ++ ++ if (meta && meta->raw_mode) { ++ meta->access_size = access_size; ++ meta->regno = regno; ++ return 0; ++ } ++ ++ for (i = min_off; i < max_off + access_size; i++) { ++ u8 *stype; ++ ++ slot = -i - 1; ++ spi = slot / BPF_REG_SIZE; ++ if (state->allocated_stack <= slot) ++ goto err; ++ stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; ++ if (*stype == STACK_MISC) ++ goto mark; ++ if (*stype == STACK_ZERO) { ++ /* helper can write anything into the stack */ ++ *stype = STACK_MISC; ++ goto mark; ++ } ++ if (state->stack[spi].slot_type[0] == STACK_SPILL && ++ state->stack[spi].spilled_ptr.type == SCALAR_VALUE) { ++ __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); ++ for (j = 0; j < BPF_REG_SIZE; j++) ++ state->stack[spi].slot_type[j] = STACK_MISC; ++ goto mark; ++ } ++ ++err: ++ if (tnum_is_const(reg->var_off)) { ++ verbose(env, "invalid indirect read from stack off %d+%d size %d\n", ++ min_off, i - min_off, access_size); ++ } else { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n", ++ tn_buf, i - min_off, access_size); ++ } + return -EACCES; ++mark: ++ /* reading any byte out of 8-byte 'spill_slot' will cause ++ * the whole slot to be marked as 'read' ++ */ ++ mark_reg_read(env, &state->stack[spi].spilled_ptr, ++ state->stack[spi].spilled_ptr.parent, ++ REG_LIVE_READ64); + } ++ return update_stack_depth(env, state, min_off); ++} ++ ++static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, ++ int access_size, bool zero_size_allowed, ++ struct bpf_call_arg_meta *meta) ++{ ++ struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + +- for (i = 0; i < access_size; i++) { +- if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) { +- verbose("invalid indirect read from stack off %d+%d size %d\n", +- off, i, access_size); ++ switch (reg->type) { ++ case PTR_TO_PACKET: ++ case PTR_TO_PACKET_META: ++ return check_packet_access(env, regno, reg->off, access_size, ++ zero_size_allowed); ++ case PTR_TO_MAP_VALUE: ++ if (check_map_access_type(env, regno, reg->off, access_size, ++ meta && meta->raw_mode ? BPF_WRITE : ++ BPF_READ)) + return -EACCES; ++ return check_map_access(env, regno, reg->off, access_size, ++ zero_size_allowed); ++ default: /* scalar_value|ptr_to_stack or invalid ptr */ ++ return check_stack_boundary(env, regno, access_size, ++ zero_size_allowed, meta); ++ } ++} ++ ++/* Implementation details: ++ * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL ++ * Two bpf_map_lookups (even with the same key) will have different reg->id. ++ * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after ++ * value_or_null->value transition, since the verifier only cares about ++ * the range of access to valid map value pointer and doesn't care about actual ++ * address of the map element. ++ * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps ++ * reg->id > 0 after value_or_null->value transition. By doing so ++ * two bpf_map_lookups will be considered two different pointers that ++ * point to different bpf_spin_locks. ++ * The verifier allows taking only one bpf_spin_lock at a time to avoid ++ * dead-locks. ++ * Since only one bpf_spin_lock is allowed the checks are simpler than ++ * reg_is_refcounted() logic. The verifier needs to remember only ++ * one spin_lock instead of array of acquired_refs. ++ * cur_state->active_spin_lock remembers which map value element got locked ++ * and clears it after bpf_spin_unlock. ++ */ ++static int process_spin_lock(struct bpf_verifier_env *env, int regno, ++ bool is_lock) ++{ ++ struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; ++ struct bpf_verifier_state *cur = env->cur_state; ++ bool is_const = tnum_is_const(reg->var_off); ++ struct bpf_map *map = reg->map_ptr; ++ u64 val = reg->var_off.value; ++ ++ if (reg->type != PTR_TO_MAP_VALUE) { ++ verbose(env, "R%d is not a pointer to map_value\n", regno); ++ return -EINVAL; ++ } ++ if (!is_const) { ++ verbose(env, ++ "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", ++ regno); ++ return -EINVAL; ++ } ++ if (!map->btf) { ++ verbose(env, ++ "map '%s' has to have BTF in order to use bpf_spin_lock\n", ++ map->name); ++ return -EINVAL; ++ } ++ if (!map_value_has_spin_lock(map)) { ++ if (map->spin_lock_off == -E2BIG) ++ verbose(env, ++ "map '%s' has more than one 'struct bpf_spin_lock'\n", ++ map->name); ++ else if (map->spin_lock_off == -ENOENT) ++ verbose(env, ++ "map '%s' doesn't have 'struct bpf_spin_lock'\n", ++ map->name); ++ else ++ verbose(env, ++ "map '%s' is not a struct type or bpf_spin_lock is mangled\n", ++ map->name); ++ return -EINVAL; ++ } ++ if (map->spin_lock_off != val + reg->off) { ++ verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n", ++ val + reg->off); ++ return -EINVAL; ++ } ++ if (is_lock) { ++ if (cur->active_spin_lock) { ++ verbose(env, ++ "Locking two bpf_spin_locks are not allowed\n"); ++ return -EINVAL; ++ } ++ cur->active_spin_lock = reg->id; ++ } else { ++ if (!cur->active_spin_lock) { ++ verbose(env, "bpf_spin_unlock without taking a lock\n"); ++ return -EINVAL; + } ++ if (cur->active_spin_lock != reg->id) { ++ verbose(env, "bpf_spin_unlock of different lock\n"); ++ return -EINVAL; ++ } ++ cur->active_spin_lock = 0; + } + return 0; + } + +-static int check_func_arg(struct verifier_env *env, u32 regno, +- enum bpf_arg_type arg_type, struct bpf_map **mapp) ++static bool arg_type_is_mem_ptr(enum bpf_arg_type type) ++{ ++ return type == ARG_PTR_TO_MEM || ++ type == ARG_PTR_TO_MEM_OR_NULL || ++ type == ARG_PTR_TO_UNINIT_MEM; ++} ++ ++static bool arg_type_is_mem_size(enum bpf_arg_type type) ++{ ++ return type == ARG_CONST_SIZE || ++ type == ARG_CONST_SIZE_OR_ZERO; ++} ++ ++static bool arg_type_is_int_ptr(enum bpf_arg_type type) ++{ ++ return type == ARG_PTR_TO_INT || ++ type == ARG_PTR_TO_LONG; ++} ++ ++static int int_ptr_type_to_size(enum bpf_arg_type type) ++{ ++ if (type == ARG_PTR_TO_INT) ++ return sizeof(u32); ++ else if (type == ARG_PTR_TO_LONG) ++ return sizeof(u64); ++ ++ return -EINVAL; ++} ++ ++static int check_func_arg(struct bpf_verifier_env *env, u32 regno, ++ enum bpf_arg_type arg_type, ++ struct bpf_call_arg_meta *meta) + { +- struct reg_state *reg = env->cur_state.regs + regno; +- enum bpf_reg_type expected_type; ++ struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; ++ enum bpf_reg_type expected_type, type = reg->type; + int err = 0; + + if (arg_type == ARG_DONTCARE) + return 0; + +- if (reg->type == NOT_INIT) { +- verbose("R%d !read_ok\n", regno); +- return -EACCES; +- } ++ err = check_reg_arg(env, regno, SRC_OP); ++ if (err) ++ return err; + + if (arg_type == ARG_ANYTHING) { + if (is_pointer_value(env, regno)) { +- verbose("R%d leaks addr into helper function\n", regno); ++ verbose(env, "R%d leaks addr into helper function\n", ++ regno); + return -EACCES; + } + return 0; + } + +- if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY || +- arg_type == ARG_PTR_TO_MAP_VALUE) { ++ if (type_is_pkt_pointer(type) && ++ !may_access_direct_pkt_data(env, meta, BPF_READ)) { ++ verbose(env, "helper access to the packet is not allowed\n"); ++ return -EACCES; ++ } ++ ++ if (arg_type == ARG_PTR_TO_MAP_KEY || ++ arg_type == ARG_PTR_TO_MAP_VALUE || ++ arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || ++ arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { + expected_type = PTR_TO_STACK; +- } else if (arg_type == ARG_CONST_STACK_SIZE) { +- expected_type = CONST_IMM; ++ if (register_is_null(reg) && ++ arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) ++ /* final test in check_stack_boundary() */; ++ else if (!type_is_pkt_pointer(type) && ++ type != PTR_TO_MAP_VALUE && ++ type != expected_type) ++ goto err_type; ++ } else if (arg_type == ARG_CONST_SIZE || ++ arg_type == ARG_CONST_SIZE_OR_ZERO) { ++ expected_type = SCALAR_VALUE; ++ if (type != expected_type) ++ goto err_type; + } else if (arg_type == ARG_CONST_MAP_PTR) { + expected_type = CONST_PTR_TO_MAP; ++ if (type != expected_type) ++ goto err_type; + } else if (arg_type == ARG_PTR_TO_CTX) { + expected_type = PTR_TO_CTX; ++ if (type != expected_type) ++ goto err_type; ++ err = check_ctx_reg(env, reg, regno); ++ if (err < 0) ++ return err; ++ } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) { ++ expected_type = PTR_TO_SOCK_COMMON; ++ /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ ++ if (!type_is_sk_pointer(type)) ++ goto err_type; ++ if (reg->ref_obj_id) { ++ if (meta->ref_obj_id) { ++ verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", ++ regno, reg->ref_obj_id, ++ meta->ref_obj_id); ++ return -EFAULT; ++ } ++ meta->ref_obj_id = reg->ref_obj_id; ++ } ++ } else if (arg_type == ARG_PTR_TO_SOCKET) { ++ expected_type = PTR_TO_SOCKET; ++ if (type != expected_type) ++ goto err_type; ++ } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { ++ if (meta->func_id == BPF_FUNC_spin_lock) { ++ if (process_spin_lock(env, regno, true)) ++ return -EACCES; ++ } else if (meta->func_id == BPF_FUNC_spin_unlock) { ++ if (process_spin_lock(env, regno, false)) ++ return -EACCES; ++ } else { ++ verbose(env, "verifier internal error\n"); ++ return -EFAULT; ++ } ++ } else if (arg_type_is_mem_ptr(arg_type)) { ++ expected_type = PTR_TO_STACK; ++ /* One exception here. In case function allows for NULL to be ++ * passed in as argument, it's a SCALAR_VALUE type. Final test ++ * happens during stack boundary checking. ++ */ ++ if (register_is_null(reg) && ++ arg_type == ARG_PTR_TO_MEM_OR_NULL) ++ /* final test in check_stack_boundary() */; ++ else if (!type_is_pkt_pointer(type) && ++ type != PTR_TO_MAP_VALUE && ++ type != expected_type) ++ goto err_type; ++ meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; ++ } else if (arg_type_is_int_ptr(arg_type)) { ++ expected_type = PTR_TO_STACK; ++ if (!type_is_pkt_pointer(type) && ++ type != PTR_TO_MAP_VALUE && ++ type != expected_type) ++ goto err_type; + } else { +- verbose("unsupported arg_type %d\n", arg_type); ++ verbose(env, "unsupported arg_type %d\n", arg_type); + return -EFAULT; + } + +- if (reg->type != expected_type) { +- verbose("R%d type=%s expected=%s\n", regno, +- reg_type_str[reg->type], reg_type_str[expected_type]); +- return -EACCES; +- } +- + if (arg_type == ARG_CONST_MAP_PTR) { + /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ +- *mapp = reg->map_ptr; +- ++ meta->map_ptr = reg->map_ptr; + } else if (arg_type == ARG_PTR_TO_MAP_KEY) { + /* bpf_map_xxx(..., map_ptr, ..., key) call: + * check that [key, key + map->key_size) are within + * stack limits and initialized + */ +- if (!*mapp) { ++ if (!meta->map_ptr) { + /* in function declaration map_ptr must come before + * map_key, so that it's verified and known before + * we have to check map_key here. Otherwise it means + * that kernel subsystem misconfigured verifier + */ +- verbose("invalid map_ptr to access map->key\n"); ++ verbose(env, "invalid map_ptr to access map->key\n"); + return -EACCES; + } +- err = check_stack_boundary(env, regno, (*mapp)->key_size); +- +- } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { ++ err = check_helper_mem_access(env, regno, ++ meta->map_ptr->key_size, false, ++ NULL); ++ } else if (arg_type == ARG_PTR_TO_MAP_VALUE || ++ (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && ++ !register_is_null(reg)) || ++ arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { + /* bpf_map_xxx(..., map_ptr, ..., value) call: + * check [value, value + map->value_size) validity + */ +- if (!*mapp) { ++ if (!meta->map_ptr) { + /* kernel subsystem misconfigured verifier */ +- verbose("invalid map_ptr to access map->value\n"); ++ verbose(env, "invalid map_ptr to access map->value\n"); + return -EACCES; + } +- err = check_stack_boundary(env, regno, (*mapp)->value_size); ++ meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE); ++ err = check_helper_mem_access(env, regno, ++ meta->map_ptr->value_size, false, ++ meta); ++ } else if (arg_type_is_mem_size(arg_type)) { ++ bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); + +- } else if (arg_type == ARG_CONST_STACK_SIZE) { +- /* bpf_xxx(..., buf, len) call will access 'len' bytes +- * from stack pointer 'buf'. Check it +- * note: regno == len, regno - 1 == buf ++ /* remember the mem_size which may be used later ++ * to refine return values. + */ +- if (regno == 0) { +- /* kernel subsystem misconfigured verifier */ +- verbose("ARG_CONST_STACK_SIZE cannot be first argument\n"); ++ meta->msize_max_value = reg->umax_value; ++ ++ /* The register is SCALAR_VALUE; the access check ++ * happens using its boundaries. ++ */ ++ if (!tnum_is_const(reg->var_off)) ++ /* For unprivileged variable accesses, disable raw ++ * mode so that the program is required to ++ * initialize all the memory that the helper could ++ * just partially fill up. ++ */ ++ meta = NULL; ++ ++ if (reg->smin_value < 0) { ++ verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", ++ regno); ++ return -EACCES; ++ } ++ ++ if (reg->umin_value == 0) { ++ err = check_helper_mem_access(env, regno - 1, 0, ++ zero_size_allowed, ++ meta); ++ if (err) ++ return err; ++ } ++ ++ if (reg->umax_value >= BPF_MAX_VAR_SIZ) { ++ verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", ++ regno); + return -EACCES; + } +- err = check_stack_boundary(env, regno - 1, reg->imm); ++ err = check_helper_mem_access(env, regno - 1, ++ reg->umax_value, ++ zero_size_allowed, meta); ++ if (!err) ++ err = mark_chain_precision(env, regno); ++ } else if (arg_type_is_int_ptr(arg_type)) { ++ int size = int_ptr_type_to_size(arg_type); ++ ++ err = check_helper_mem_access(env, regno, size, false, meta); ++ if (err) ++ return err; ++ err = check_ptr_alignment(env, reg, 0, size, true); + } + + return err; ++err_type: ++ verbose(env, "R%d type=%s expected=%s\n", regno, ++ reg_type_str[type], reg_type_str[expected_type]); ++ return -EACCES; + } + +-static int check_map_func_compatibility(struct bpf_map *map, int func_id) ++static int check_map_func_compatibility(struct bpf_verifier_env *env, ++ struct bpf_map *map, int func_id) + { + if (!map) + return 0; +@@ -911,7 +3474,74 @@ static int check_map_func_compatibility( + break; + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + if (func_id != BPF_FUNC_perf_event_read && +- func_id != BPF_FUNC_perf_event_output) ++ func_id != BPF_FUNC_perf_event_output && ++ func_id != BPF_FUNC_perf_event_read_value) ++ goto error; ++ break; ++ case BPF_MAP_TYPE_STACK_TRACE: ++ if (func_id != BPF_FUNC_get_stackid) ++ goto error; ++ break; ++ case BPF_MAP_TYPE_CGROUP_ARRAY: ++ if (func_id != BPF_FUNC_skb_under_cgroup && ++ func_id != BPF_FUNC_current_task_under_cgroup) ++ goto error; ++ break; ++ case BPF_MAP_TYPE_CGROUP_STORAGE: ++ if (func_id != BPF_FUNC_get_local_storage) ++ goto error; ++ break; ++ case BPF_MAP_TYPE_DEVMAP: ++ case BPF_MAP_TYPE_DEVMAP_HASH: ++ if (func_id != BPF_FUNC_redirect_map && ++ func_id != BPF_FUNC_map_lookup_elem) ++ goto error; ++ break; ++ /* Restrict bpf side of cpumap and xskmap, open when use-cases ++ * appear. ++ */ ++ case BPF_MAP_TYPE_CPUMAP: ++ if (func_id != BPF_FUNC_redirect_map) ++ goto error; ++ break; ++ case BPF_MAP_TYPE_XSKMAP: ++ if (func_id != BPF_FUNC_redirect_map && ++ func_id != BPF_FUNC_map_lookup_elem) ++ goto error; ++ break; ++ case BPF_MAP_TYPE_ARRAY_OF_MAPS: ++ case BPF_MAP_TYPE_HASH_OF_MAPS: ++ if (func_id != BPF_FUNC_map_lookup_elem) ++ goto error; ++ break; ++ case BPF_MAP_TYPE_SOCKMAP: ++ if (func_id != BPF_FUNC_sk_redirect_map && ++ func_id != BPF_FUNC_sock_map_update && ++ func_id != BPF_FUNC_map_delete_elem && ++ func_id != BPF_FUNC_msg_redirect_map) ++ goto error; ++ break; ++ case BPF_MAP_TYPE_SOCKHASH: ++ if (func_id != BPF_FUNC_sk_redirect_hash && ++ func_id != BPF_FUNC_sock_hash_update && ++ func_id != BPF_FUNC_map_delete_elem && ++ func_id != BPF_FUNC_msg_redirect_hash) ++ goto error; ++ break; ++ case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: ++ if (func_id != BPF_FUNC_sk_select_reuseport) ++ goto error; ++ break; ++ case BPF_MAP_TYPE_QUEUE: ++ case BPF_MAP_TYPE_STACK: ++ if (func_id != BPF_FUNC_map_peek_elem && ++ func_id != BPF_FUNC_map_pop_elem && ++ func_id != BPF_FUNC_map_push_elem) ++ goto error; ++ break; ++ case BPF_MAP_TYPE_SK_STORAGE: ++ if (func_id != BPF_FUNC_sk_storage_get && ++ func_id != BPF_FUNC_sk_storage_delete) + goto error; + break; + default: +@@ -923,109 +3553,1579 @@ static int check_map_func_compatibility( + case BPF_FUNC_tail_call: + if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + goto error; ++ if (env->subprog_cnt > 1) { ++ verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); ++ return -EINVAL; ++ } + break; + case BPF_FUNC_perf_event_read: + case BPF_FUNC_perf_event_output: ++ case BPF_FUNC_perf_event_read_value: + if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) + goto error; + break; ++ case BPF_FUNC_get_stackid: ++ if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) ++ goto error; ++ break; ++ case BPF_FUNC_current_task_under_cgroup: ++ case BPF_FUNC_skb_under_cgroup: ++ if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) ++ goto error; ++ break; ++ case BPF_FUNC_redirect_map: ++ if (map->map_type != BPF_MAP_TYPE_DEVMAP && ++ map->map_type != BPF_MAP_TYPE_DEVMAP_HASH && ++ map->map_type != BPF_MAP_TYPE_CPUMAP && ++ map->map_type != BPF_MAP_TYPE_XSKMAP) ++ goto error; ++ break; ++ case BPF_FUNC_sk_redirect_map: ++ case BPF_FUNC_msg_redirect_map: ++ case BPF_FUNC_sock_map_update: ++ if (map->map_type != BPF_MAP_TYPE_SOCKMAP) ++ goto error; ++ break; ++ case BPF_FUNC_sk_redirect_hash: ++ case BPF_FUNC_msg_redirect_hash: ++ case BPF_FUNC_sock_hash_update: ++ if (map->map_type != BPF_MAP_TYPE_SOCKHASH) ++ goto error; ++ break; ++ case BPF_FUNC_get_local_storage: ++ if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && ++ map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) ++ goto error; ++ break; ++ case BPF_FUNC_sk_select_reuseport: ++ if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) ++ goto error; ++ break; ++ case BPF_FUNC_map_peek_elem: ++ case BPF_FUNC_map_pop_elem: ++ case BPF_FUNC_map_push_elem: ++ if (map->map_type != BPF_MAP_TYPE_QUEUE && ++ map->map_type != BPF_MAP_TYPE_STACK) ++ goto error; ++ break; ++ case BPF_FUNC_sk_storage_get: ++ case BPF_FUNC_sk_storage_delete: ++ if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) ++ goto error; ++ break; + default: + break; + } + + return 0; + error: +- verbose("cannot pass map_type %d into func %d\n", +- map->map_type, func_id); ++ verbose(env, "cannot pass map_type %d into func %s#%d\n", ++ map->map_type, func_id_name(func_id), func_id); + return -EINVAL; + } + +-static int check_call(struct verifier_env *env, int func_id) ++static bool check_raw_mode_ok(const struct bpf_func_proto *fn) ++{ ++ int count = 0; ++ ++ if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM) ++ count++; ++ if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM) ++ count++; ++ if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM) ++ count++; ++ if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM) ++ count++; ++ if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) ++ count++; ++ ++ /* We only support one arg being in raw mode at the moment, ++ * which is sufficient for the helper functions we have ++ * right now. ++ */ ++ return count <= 1; ++} ++ ++static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, ++ enum bpf_arg_type arg_next) ++{ ++ return (arg_type_is_mem_ptr(arg_curr) && ++ !arg_type_is_mem_size(arg_next)) || ++ (!arg_type_is_mem_ptr(arg_curr) && ++ arg_type_is_mem_size(arg_next)); ++} ++ ++static bool check_arg_pair_ok(const struct bpf_func_proto *fn) ++{ ++ /* bpf_xxx(..., buf, len) call will access 'len' ++ * bytes from memory 'buf'. Both arg types need ++ * to be paired, so make sure there's no buggy ++ * helper function specification. ++ */ ++ if (arg_type_is_mem_size(fn->arg1_type) || ++ arg_type_is_mem_ptr(fn->arg5_type) || ++ check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || ++ check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || ++ check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || ++ check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) ++ return false; ++ ++ return true; ++} ++ ++static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id) ++{ ++ int count = 0; ++ ++ if (arg_type_may_be_refcounted(fn->arg1_type)) ++ count++; ++ if (arg_type_may_be_refcounted(fn->arg2_type)) ++ count++; ++ if (arg_type_may_be_refcounted(fn->arg3_type)) ++ count++; ++ if (arg_type_may_be_refcounted(fn->arg4_type)) ++ count++; ++ if (arg_type_may_be_refcounted(fn->arg5_type)) ++ count++; ++ ++ /* A reference acquiring function cannot acquire ++ * another refcounted ptr. ++ */ ++ if (is_acquire_function(func_id) && count) ++ return false; ++ ++ /* We only support one arg being unreferenced at the moment, ++ * which is sufficient for the helper functions we have right now. ++ */ ++ return count <= 1; ++} ++ ++static int check_func_proto(const struct bpf_func_proto *fn, int func_id) ++{ ++ return check_raw_mode_ok(fn) && ++ check_arg_pair_ok(fn) && ++ check_refcount_ok(fn, func_id) ? 0 : -EINVAL; ++} ++ ++/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] ++ * are now invalid, so turn them into unknown SCALAR_VALUE. ++ */ ++static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, ++ struct bpf_func_state *state) ++{ ++ struct bpf_reg_state *regs = state->regs, *reg; ++ int i; ++ ++ for (i = 0; i < MAX_BPF_REG; i++) ++ if (reg_is_pkt_pointer_any(®s[i])) ++ mark_reg_unknown(env, regs, i); ++ ++ bpf_for_each_spilled_reg(i, state, reg) { ++ if (!reg) ++ continue; ++ if (reg_is_pkt_pointer_any(reg)) ++ __mark_reg_unknown(env, reg); ++ } ++} ++ ++static void clear_all_pkt_pointers(struct bpf_verifier_env *env) ++{ ++ struct bpf_verifier_state *vstate = env->cur_state; ++ int i; ++ ++ for (i = 0; i <= vstate->curframe; i++) ++ __clear_all_pkt_pointers(env, vstate->frame[i]); ++} ++ ++static void release_reg_references(struct bpf_verifier_env *env, ++ struct bpf_func_state *state, ++ int ref_obj_id) ++{ ++ struct bpf_reg_state *regs = state->regs, *reg; ++ int i; ++ ++ for (i = 0; i < MAX_BPF_REG; i++) ++ if (regs[i].ref_obj_id == ref_obj_id) ++ mark_reg_unknown(env, regs, i); ++ ++ bpf_for_each_spilled_reg(i, state, reg) { ++ if (!reg) ++ continue; ++ if (reg->ref_obj_id == ref_obj_id) ++ __mark_reg_unknown(env, reg); ++ } ++} ++ ++/* The pointer with the specified id has released its reference to kernel ++ * resources. Identify all copies of the same pointer and clear the reference. ++ */ ++static int release_reference(struct bpf_verifier_env *env, ++ int ref_obj_id) ++{ ++ struct bpf_verifier_state *vstate = env->cur_state; ++ int err; ++ int i; ++ ++ err = release_reference_state(cur_func(env), ref_obj_id); ++ if (err) ++ return err; ++ ++ for (i = 0; i <= vstate->curframe; i++) ++ release_reg_references(env, vstate->frame[i], ref_obj_id); ++ ++ return 0; ++} ++ ++static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ++ int *insn_idx) ++{ ++ struct bpf_verifier_state *state = env->cur_state; ++ struct bpf_func_state *caller, *callee; ++ int i, err, subprog, target_insn; ++ ++ if (state->curframe + 1 >= MAX_CALL_FRAMES) { ++ verbose(env, "the call stack of %d frames is too deep\n", ++ state->curframe + 2); ++ return -E2BIG; ++ } ++ ++ target_insn = *insn_idx + insn->imm; ++ subprog = find_subprog(env, target_insn + 1); ++ if (subprog < 0) { ++ verbose(env, "verifier bug. No program starts at insn %d\n", ++ target_insn + 1); ++ return -EFAULT; ++ } ++ ++ caller = state->frame[state->curframe]; ++ if (state->frame[state->curframe + 1]) { ++ verbose(env, "verifier bug. Frame %d already allocated\n", ++ state->curframe + 1); ++ return -EFAULT; ++ } ++ ++ callee = kzalloc(sizeof(*callee), GFP_KERNEL); ++ if (!callee) ++ return -ENOMEM; ++ state->frame[state->curframe + 1] = callee; ++ ++ /* callee cannot access r0, r6 - r9 for reading and has to write ++ * into its own stack before reading from it. ++ * callee can read/write into caller's stack ++ */ ++ init_func_state(env, callee, ++ /* remember the callsite, it will be used by bpf_exit */ ++ *insn_idx /* callsite */, ++ state->curframe + 1 /* frameno within this callchain */, ++ subprog /* subprog number within this prog */); ++ ++ /* Transfer references to the callee */ ++ err = transfer_reference_state(callee, caller); ++ if (err) ++ return err; ++ ++ /* copy r1 - r5 args that callee can access. The copy includes parent ++ * pointers, which connects us up to the liveness chain ++ */ ++ for (i = BPF_REG_1; i <= BPF_REG_5; i++) ++ callee->regs[i] = caller->regs[i]; ++ ++ /* after the call registers r0 - r5 were scratched */ ++ for (i = 0; i < CALLER_SAVED_REGS; i++) { ++ mark_reg_not_init(env, caller->regs, caller_saved[i]); ++ check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); ++ } ++ ++ /* only increment it after check_reg_arg() finished */ ++ state->curframe++; ++ ++ /* and go analyze first insn of the callee */ ++ *insn_idx = target_insn; ++ ++ if (env->log.level & BPF_LOG_LEVEL) { ++ verbose(env, "caller:\n"); ++ print_verifier_state(env, caller); ++ verbose(env, "callee:\n"); ++ print_verifier_state(env, callee); ++ } ++ return 0; ++} ++ ++static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) ++{ ++ struct bpf_verifier_state *state = env->cur_state; ++ struct bpf_func_state *caller, *callee; ++ struct bpf_reg_state *r0; ++ int err; ++ ++ callee = state->frame[state->curframe]; ++ r0 = &callee->regs[BPF_REG_0]; ++ if (r0->type == PTR_TO_STACK) { ++ /* technically it's ok to return caller's stack pointer ++ * (or caller's caller's pointer) back to the caller, ++ * since these pointers are valid. Only current stack ++ * pointer will be invalid as soon as function exits, ++ * but let's be conservative ++ */ ++ verbose(env, "cannot return stack pointer to the caller\n"); ++ return -EINVAL; ++ } ++ ++ state->curframe--; ++ caller = state->frame[state->curframe]; ++ /* return to the caller whatever r0 had in the callee */ ++ caller->regs[BPF_REG_0] = *r0; ++ ++ /* Transfer references to the caller */ ++ err = transfer_reference_state(caller, callee); ++ if (err) ++ return err; ++ ++ *insn_idx = callee->callsite + 1; ++ if (env->log.level & BPF_LOG_LEVEL) { ++ verbose(env, "returning from callee:\n"); ++ print_verifier_state(env, callee); ++ verbose(env, "to caller at %d:\n", *insn_idx); ++ print_verifier_state(env, caller); ++ } ++ /* clear everything in the callee */ ++ free_func_state(callee); ++ state->frame[state->curframe + 1] = NULL; ++ return 0; ++} ++ ++static int do_refine_retval_range(struct bpf_verifier_env *env, ++ struct bpf_reg_state *regs, int ret_type, ++ int func_id, struct bpf_call_arg_meta *meta) ++{ ++ struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; ++ struct bpf_reg_state tmp_reg = *ret_reg; ++ bool ret; ++ ++ if (ret_type != RET_INTEGER || ++ (func_id != BPF_FUNC_get_stack && ++ func_id != BPF_FUNC_probe_read_str)) ++ return 0; ++ ++ /* Error case where ret is in interval [S32MIN, -1]. */ ++ ret_reg->smin_value = S32_MIN; ++ ret_reg->smax_value = -1; ++ ++ __reg_deduce_bounds(ret_reg); ++ __reg_bound_offset(ret_reg); ++ __update_reg_bounds(ret_reg); ++ ++ ret = push_stack(env, env->insn_idx + 1, env->insn_idx, false); ++ if (!ret) ++ return -EFAULT; ++ ++ *ret_reg = tmp_reg; ++ ++ /* Success case where ret is in range [0, msize_max_value]. */ ++ ret_reg->smin_value = 0; ++ ret_reg->smax_value = meta->msize_max_value; ++ ret_reg->umin_value = ret_reg->smin_value; ++ ret_reg->umax_value = ret_reg->smax_value; ++ ++ __reg_deduce_bounds(ret_reg); ++ __reg_bound_offset(ret_reg); ++ __update_reg_bounds(ret_reg); ++ ++ return 0; ++} ++ ++static int ++record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, ++ int func_id, int insn_idx) ++{ ++ struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; ++ struct bpf_map *map = meta->map_ptr; ++ ++ if (func_id != BPF_FUNC_tail_call && ++ func_id != BPF_FUNC_map_lookup_elem && ++ func_id != BPF_FUNC_map_update_elem && ++ func_id != BPF_FUNC_map_delete_elem && ++ func_id != BPF_FUNC_map_push_elem && ++ func_id != BPF_FUNC_map_pop_elem && ++ func_id != BPF_FUNC_map_peek_elem) ++ return 0; ++ ++ if (map == NULL) { ++ verbose(env, "kernel subsystem misconfigured verifier\n"); ++ return -EINVAL; ++ } ++ ++ /* In case of read-only, some additional restrictions ++ * need to be applied in order to prevent altering the ++ * state of the map from program side. ++ */ ++ if ((map->map_flags & BPF_F_RDONLY_PROG) && ++ (func_id == BPF_FUNC_map_delete_elem || ++ func_id == BPF_FUNC_map_update_elem || ++ func_id == BPF_FUNC_map_push_elem || ++ func_id == BPF_FUNC_map_pop_elem)) { ++ verbose(env, "write into map forbidden\n"); ++ return -EACCES; ++ } ++ ++ if (!BPF_MAP_PTR(aux->map_state)) ++ bpf_map_ptr_store(aux, meta->map_ptr, ++ meta->map_ptr->unpriv_array); ++ else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr) ++ bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, ++ meta->map_ptr->unpriv_array); ++ return 0; ++} ++ ++static int check_reference_leak(struct bpf_verifier_env *env) ++{ ++ struct bpf_func_state *state = cur_func(env); ++ int i; ++ ++ for (i = 0; i < state->acquired_refs; i++) { ++ verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", ++ state->refs[i].id, state->refs[i].insn_idx); ++ } ++ return state->acquired_refs ? -EINVAL : 0; ++} ++ ++static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) + { +- struct verifier_state *state = &env->cur_state; + const struct bpf_func_proto *fn = NULL; +- struct reg_state *regs = state->regs; +- struct bpf_map *map = NULL; +- struct reg_state *reg; ++ struct bpf_reg_state *regs; ++ struct bpf_call_arg_meta meta; ++ bool changes_data; + int i, err; + + /* find function prototype */ + if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { +- verbose("invalid func %d\n", func_id); ++ verbose(env, "invalid func %s#%d\n", func_id_name(func_id), ++ func_id); + return -EINVAL; + } + +- if (env->prog->aux->ops->get_func_proto) +- fn = env->prog->aux->ops->get_func_proto(func_id); +- ++ if (env->ops->get_func_proto) ++ fn = env->ops->get_func_proto(func_id, env->prog); + if (!fn) { +- verbose("unknown func %d\n", func_id); ++ verbose(env, "unknown func %s#%d\n", func_id_name(func_id), ++ func_id); + return -EINVAL; + } + + /* eBPF programs must be GPL compatible to use GPL-ed functions */ + if (!env->prog->gpl_compatible && fn->gpl_only) { +- verbose("cannot call GPL only function from proprietary program\n"); ++ verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n"); ++ return -EINVAL; ++ } ++ ++ /* With LD_ABS/IND some JITs save/restore skb from r1. */ ++ changes_data = bpf_helper_changes_pkt_data(fn->func); ++ if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { ++ verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", ++ func_id_name(func_id), func_id); + return -EINVAL; + } + ++ memset(&meta, 0, sizeof(meta)); ++ meta.pkt_access = fn->pkt_access; ++ ++ err = check_func_proto(fn, func_id); ++ if (err) { ++ verbose(env, "kernel subsystem misconfigured func %s#%d\n", ++ func_id_name(func_id), func_id); ++ return err; ++ } ++ ++ meta.func_id = func_id; + /* check args */ +- err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &map); ++ err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta); + if (err) + return err; +- err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map); ++ err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); + if (err) + return err; +- err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map); ++ err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); + if (err) + return err; +- err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &map); ++ err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta); + if (err) + return err; +- err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &map); ++ err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta); + if (err) + return err; + ++ err = record_func_map(env, &meta, func_id, insn_idx); ++ if (err) ++ return err; ++ ++ /* Mark slots with STACK_MISC in case of raw mode, stack offset ++ * is inferred from register state. ++ */ ++ for (i = 0; i < meta.access_size; i++) { ++ err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, ++ BPF_WRITE, -1, false); ++ if (err) ++ return err; ++ } ++ ++ if (func_id == BPF_FUNC_tail_call) { ++ err = check_reference_leak(env); ++ if (err) { ++ verbose(env, "tail_call would lead to reference leak\n"); ++ return err; ++ } ++ } else if (is_release_function(func_id)) { ++ err = release_reference(env, meta.ref_obj_id); ++ if (err) { ++ verbose(env, "func %s#%d reference has not been acquired before\n", ++ func_id_name(func_id), func_id); ++ return err; ++ } ++ } ++ ++ regs = cur_regs(env); ++ ++ /* check that flags argument in get_local_storage(map, flags) is 0, ++ * this is required because get_local_storage() can't return an error. ++ */ ++ if (func_id == BPF_FUNC_get_local_storage && ++ !register_is_null(®s[BPF_REG_2])) { ++ verbose(env, "get_local_storage() doesn't support non-zero flags\n"); ++ return -EINVAL; ++ } ++ + /* reset caller saved regs */ + for (i = 0; i < CALLER_SAVED_REGS; i++) { +- reg = regs + caller_saved[i]; +- reg->type = NOT_INIT; +- reg->imm = 0; ++ mark_reg_not_init(env, regs, caller_saved[i]); ++ check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); + } + +- /* update return register */ ++ /* helper call returns 64-bit value. */ ++ regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; ++ ++ /* update return register (already marked as written above) */ + if (fn->ret_type == RET_INTEGER) { +- regs[BPF_REG_0].type = UNKNOWN_VALUE; ++ /* sets type to SCALAR_VALUE */ ++ mark_reg_unknown(env, regs, BPF_REG_0); + } else if (fn->ret_type == RET_VOID) { + regs[BPF_REG_0].type = NOT_INIT; +- } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) { +- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; ++ } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || ++ fn->ret_type == RET_PTR_TO_MAP_VALUE) { ++ /* There is no offset yet applied, variable or fixed */ ++ mark_reg_known_zero(env, regs, BPF_REG_0); + /* remember map_ptr, so that check_map_access() + * can check 'value_size' boundary of memory access + * to map element returned from bpf_map_lookup_elem() + */ +- if (map == NULL) { +- verbose("kernel subsystem misconfigured verifier\n"); ++ if (meta.map_ptr == NULL) { ++ verbose(env, ++ "kernel subsystem misconfigured verifier\n"); + return -EINVAL; + } +- regs[BPF_REG_0].map_ptr = map; ++ regs[BPF_REG_0].map_ptr = meta.map_ptr; ++ if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { ++ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; ++ if (map_value_has_spin_lock(meta.map_ptr)) ++ regs[BPF_REG_0].id = ++env->id_gen; ++ } else { ++ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; ++ regs[BPF_REG_0].id = ++env->id_gen; ++ } ++ } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { ++ mark_reg_known_zero(env, regs, BPF_REG_0); ++ regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; ++ regs[BPF_REG_0].id = ++env->id_gen; ++ } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { ++ mark_reg_known_zero(env, regs, BPF_REG_0); ++ regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; ++ regs[BPF_REG_0].id = ++env->id_gen; ++ } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { ++ mark_reg_known_zero(env, regs, BPF_REG_0); ++ regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; ++ regs[BPF_REG_0].id = ++env->id_gen; + } else { +- verbose("unknown return type %d of func %d\n", +- fn->ret_type, func_id); ++ verbose(env, "unknown return type %d of func %s#%d\n", ++ fn->ret_type, func_id_name(func_id), func_id); + return -EINVAL; + } + +- err = check_map_func_compatibility(map, func_id); ++ if (is_ptr_cast_function(func_id)) { ++ /* For release_reference() */ ++ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; ++ } else if (is_acquire_function(func_id)) { ++ int id = acquire_reference_state(env, insn_idx); ++ ++ if (id < 0) ++ return id; ++ /* For mark_ptr_or_null_reg() */ ++ regs[BPF_REG_0].id = id; ++ /* For release_reference() */ ++ regs[BPF_REG_0].ref_obj_id = id; ++ } ++ ++ err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta); ++ if (err) ++ return err; ++ ++ err = check_map_func_compatibility(env, meta.map_ptr, func_id); + if (err) + return err; + ++ if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) { ++ const char *err_str; ++ ++#ifdef CONFIG_PERF_EVENTS ++ err = get_callchain_buffers(sysctl_perf_event_max_stack); ++ err_str = "cannot get callchain buffer for func %s#%d\n"; ++#else ++ err = -ENOTSUPP; ++ err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n"; ++#endif ++ if (err) { ++ verbose(env, err_str, func_id_name(func_id), func_id); ++ return err; ++ } ++ ++ env->prog->has_callchain_buf = true; ++ } ++ ++ if (changes_data) ++ clear_all_pkt_pointers(env); + return 0; + } + ++static bool signed_add_overflows(s64 a, s64 b) ++{ ++ /* Do the add in u64, where overflow is well-defined */ ++ s64 res = (s64)((u64)a + (u64)b); ++ ++ if (b < 0) ++ return res > a; ++ return res < a; ++} ++ ++static bool signed_sub_overflows(s64 a, s64 b) ++{ ++ /* Do the sub in u64, where overflow is well-defined */ ++ s64 res = (s64)((u64)a - (u64)b); ++ ++ if (b < 0) ++ return res < a; ++ return res > a; ++} ++ ++static bool check_reg_sane_offset(struct bpf_verifier_env *env, ++ const struct bpf_reg_state *reg, ++ enum bpf_reg_type type) ++{ ++ bool known = tnum_is_const(reg->var_off); ++ s64 val = reg->var_off.value; ++ s64 smin = reg->smin_value; ++ ++ if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { ++ verbose(env, "math between %s pointer and %lld is not allowed\n", ++ reg_type_str[type], val); ++ return false; ++ } ++ ++ if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { ++ verbose(env, "%s pointer offset %d is not allowed\n", ++ reg_type_str[type], reg->off); ++ return false; ++ } ++ ++ if (smin == S64_MIN) { ++ verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", ++ reg_type_str[type]); ++ return false; ++ } ++ ++ if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { ++ verbose(env, "value %lld makes %s pointer be out of bounds\n", ++ smin, reg_type_str[type]); ++ return false; ++ } ++ ++ return true; ++} ++ ++static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) ++{ ++ return &env->insn_aux_data[env->insn_idx]; ++} ++ ++enum { ++ REASON_BOUNDS = -1, ++ REASON_TYPE = -2, ++ REASON_PATHS = -3, ++ REASON_LIMIT = -4, ++ REASON_STACK = -5, ++}; ++ ++static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, ++ u32 *alu_limit, bool mask_to_left) ++{ ++ u32 max = 0, ptr_limit = 0; ++ ++ switch (ptr_reg->type) { ++ case PTR_TO_STACK: ++ /* Offset 0 is out-of-bounds, but acceptable start for the ++ * left direction, see BPF_REG_FP. Also, unknown scalar ++ * offset where we would need to deal with min/max bounds is ++ * currently prohibited for unprivileged. ++ */ ++ max = MAX_BPF_STACK + mask_to_left; ++ ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off); ++ break; ++ case PTR_TO_MAP_VALUE: ++ max = ptr_reg->map_ptr->value_size; ++ ptr_limit = (mask_to_left ? ++ ptr_reg->smin_value : ++ ptr_reg->umax_value) + ptr_reg->off; ++ break; ++ default: ++ return REASON_TYPE; ++ } ++ ++ if (ptr_limit >= max) ++ return REASON_LIMIT; ++ *alu_limit = ptr_limit; ++ return 0; ++} ++ ++static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, ++ const struct bpf_insn *insn) ++{ ++ return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; ++} ++ ++static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, ++ u32 alu_state, u32 alu_limit) ++{ ++ /* If we arrived here from different branches with different ++ * state or limits to sanitize, then this won't work. ++ */ ++ if (aux->alu_state && ++ (aux->alu_state != alu_state || ++ aux->alu_limit != alu_limit)) ++ return REASON_PATHS; ++ ++ /* Corresponding fixup done in fixup_bpf_calls(). */ ++ aux->alu_state = alu_state; ++ aux->alu_limit = alu_limit; ++ return 0; ++} ++ ++static int sanitize_val_alu(struct bpf_verifier_env *env, ++ struct bpf_insn *insn) ++{ ++ struct bpf_insn_aux_data *aux = cur_aux(env); ++ ++ if (can_skip_alu_sanitation(env, insn)) ++ return 0; ++ ++ return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); ++} ++ ++static bool sanitize_needed(u8 opcode) ++{ ++ return opcode == BPF_ADD || opcode == BPF_SUB; ++} ++ ++struct bpf_sanitize_info { ++ struct bpf_insn_aux_data aux; ++ bool mask_to_left; ++}; ++ ++static struct bpf_verifier_state * ++sanitize_speculative_path(struct bpf_verifier_env *env, ++ const struct bpf_insn *insn, ++ u32 next_idx, u32 curr_idx) ++{ ++ struct bpf_verifier_state *branch; ++ struct bpf_reg_state *regs; ++ ++ branch = push_stack(env, next_idx, curr_idx, true); ++ if (branch && insn) { ++ regs = branch->frame[branch->curframe]->regs; ++ if (BPF_SRC(insn->code) == BPF_K) { ++ mark_reg_unknown(env, regs, insn->dst_reg); ++ } else if (BPF_SRC(insn->code) == BPF_X) { ++ mark_reg_unknown(env, regs, insn->dst_reg); ++ mark_reg_unknown(env, regs, insn->src_reg); ++ } ++ } ++ return branch; ++} ++ ++static int sanitize_ptr_alu(struct bpf_verifier_env *env, ++ struct bpf_insn *insn, ++ const struct bpf_reg_state *ptr_reg, ++ const struct bpf_reg_state *off_reg, ++ struct bpf_reg_state *dst_reg, ++ struct bpf_sanitize_info *info, ++ const bool commit_window) ++{ ++ struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux; ++ struct bpf_verifier_state *vstate = env->cur_state; ++ bool off_is_imm = tnum_is_const(off_reg->var_off); ++ bool off_is_neg = off_reg->smin_value < 0; ++ bool ptr_is_dst_reg = ptr_reg == dst_reg; ++ u8 opcode = BPF_OP(insn->code); ++ u32 alu_state, alu_limit; ++ struct bpf_reg_state tmp; ++ bool ret; ++ int err; ++ ++ if (can_skip_alu_sanitation(env, insn)) ++ return 0; ++ ++ /* We already marked aux for masking from non-speculative ++ * paths, thus we got here in the first place. We only care ++ * to explore bad access from here. ++ */ ++ if (vstate->speculative) ++ goto do_sim; ++ ++ if (!commit_window) { ++ if (!tnum_is_const(off_reg->var_off) && ++ (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) ++ return REASON_BOUNDS; ++ ++ info->mask_to_left = (opcode == BPF_ADD && off_is_neg) || ++ (opcode == BPF_SUB && !off_is_neg); ++ } ++ ++ err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left); ++ if (err < 0) ++ return err; ++ ++ if (commit_window) { ++ /* In commit phase we narrow the masking window based on ++ * the observed pointer move after the simulated operation. ++ */ ++ alu_state = info->aux.alu_state; ++ alu_limit = abs(info->aux.alu_limit - alu_limit); ++ } else { ++ alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; ++ alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0; ++ alu_state |= ptr_is_dst_reg ? ++ BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; ++ } ++ ++ err = update_alu_sanitation_state(aux, alu_state, alu_limit); ++ if (err < 0) ++ return err; ++do_sim: ++ /* If we're in commit phase, we're done here given we already ++ * pushed the truncated dst_reg into the speculative verification ++ * stack. ++ * ++ * Also, when register is a known constant, we rewrite register-based ++ * operation to immediate-based, and thus do not need masking (and as ++ * a consequence, do not need to simulate the zero-truncation either). ++ */ ++ if (commit_window || off_is_imm) ++ return 0; ++ ++ /* Simulate and find potential out-of-bounds access under ++ * speculative execution from truncation as a result of ++ * masking when off was not within expected range. If off ++ * sits in dst, then we temporarily need to move ptr there ++ * to simulate dst (== 0) +/-= ptr. Needed, for example, ++ * for cases where we use K-based arithmetic in one direction ++ * and truncated reg-based in the other in order to explore ++ * bad access. ++ */ ++ if (!ptr_is_dst_reg) { ++ tmp = *dst_reg; ++ *dst_reg = *ptr_reg; ++ } ++ ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, ++ env->insn_idx); ++ if (!ptr_is_dst_reg && ret) ++ *dst_reg = tmp; ++ return !ret ? REASON_STACK : 0; ++} ++ ++static void sanitize_mark_insn_seen(struct bpf_verifier_env *env) ++{ ++ struct bpf_verifier_state *vstate = env->cur_state; ++ ++ /* If we simulate paths under speculation, we don't update the ++ * insn as 'seen' such that when we verify unreachable paths in ++ * the non-speculative domain, sanitize_dead_code() can still ++ * rewrite/sanitize them. ++ */ ++ if (!vstate->speculative) ++ env->insn_aux_data[env->insn_idx].seen = true; ++} ++ ++static int sanitize_err(struct bpf_verifier_env *env, ++ const struct bpf_insn *insn, int reason, ++ const struct bpf_reg_state *off_reg, ++ const struct bpf_reg_state *dst_reg) ++{ ++ static const char *err = "pointer arithmetic with it prohibited for !root"; ++ const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub"; ++ u32 dst = insn->dst_reg, src = insn->src_reg; ++ ++ switch (reason) { ++ case REASON_BOUNDS: ++ verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n", ++ off_reg == dst_reg ? dst : src, err); ++ break; ++ case REASON_TYPE: ++ verbose(env, "R%d has pointer with unsupported alu operation, %s\n", ++ off_reg == dst_reg ? src : dst, err); ++ break; ++ case REASON_PATHS: ++ verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n", ++ dst, op, err); ++ break; ++ case REASON_LIMIT: ++ verbose(env, "R%d tried to %s beyond pointer bounds, %s\n", ++ dst, op, err); ++ break; ++ case REASON_STACK: ++ verbose(env, "R%d could not be pushed for speculative verification, %s\n", ++ dst, err); ++ break; ++ default: ++ verbose(env, "verifier internal error: unknown reason (%d)\n", ++ reason); ++ break; ++ } ++ ++ return -EACCES; ++} ++ ++static int sanitize_check_bounds(struct bpf_verifier_env *env, ++ const struct bpf_insn *insn, ++ const struct bpf_reg_state *dst_reg) ++{ ++ u32 dst = insn->dst_reg; ++ ++ /* For unprivileged we require that resulting offset must be in bounds ++ * in order to be able to sanitize access later on. ++ */ ++ if (env->allow_ptr_leaks) ++ return 0; ++ ++ switch (dst_reg->type) { ++ case PTR_TO_STACK: ++ if (check_stack_access(env, dst_reg, dst_reg->off + ++ dst_reg->var_off.value, 1)) { ++ verbose(env, "R%d stack pointer arithmetic goes out of range, " ++ "prohibited for !root\n", dst); ++ return -EACCES; ++ } ++ break; ++ case PTR_TO_MAP_VALUE: ++ if (check_map_access(env, dst, dst_reg->off, 1, false)) { ++ verbose(env, "R%d pointer arithmetic of map value goes out of range, " ++ "prohibited for !root\n", dst); ++ return -EACCES; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ return 0; ++} ++ ++/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. ++ * Caller should also handle BPF_MOV case separately. ++ * If we return -EACCES, caller may want to try again treating pointer as a ++ * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks. ++ */ ++static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, ++ struct bpf_insn *insn, ++ const struct bpf_reg_state *ptr_reg, ++ const struct bpf_reg_state *off_reg) ++{ ++ struct bpf_verifier_state *vstate = env->cur_state; ++ struct bpf_func_state *state = vstate->frame[vstate->curframe]; ++ struct bpf_reg_state *regs = state->regs, *dst_reg; ++ bool known = tnum_is_const(off_reg->var_off); ++ s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, ++ smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; ++ u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, ++ umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; ++ struct bpf_sanitize_info info = {}; ++ u8 opcode = BPF_OP(insn->code); ++ u32 dst = insn->dst_reg; ++ int ret; ++ ++ dst_reg = ®s[dst]; ++ ++ if ((known && (smin_val != smax_val || umin_val != umax_val)) || ++ smin_val > smax_val || umin_val > umax_val) { ++ /* Taint dst register if offset had invalid bounds derived from ++ * e.g. dead branches. ++ */ ++ __mark_reg_unknown(env, dst_reg); ++ return 0; ++ } ++ ++ if (BPF_CLASS(insn->code) != BPF_ALU64) { ++ /* 32-bit ALU ops on pointers produce (meaningless) scalars */ ++ verbose(env, ++ "R%d 32-bit pointer arithmetic prohibited\n", ++ dst); ++ return -EACCES; ++ } ++ ++ switch (ptr_reg->type) { ++ case PTR_TO_MAP_VALUE_OR_NULL: ++ verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", ++ dst, reg_type_str[ptr_reg->type]); ++ return -EACCES; ++ case CONST_PTR_TO_MAP: ++ /* smin_val represents the known value */ ++ if (known && smin_val == 0 && opcode == BPF_ADD) ++ break; ++ /* fall-through */ ++ case PTR_TO_PACKET_END: ++ case PTR_TO_SOCKET: ++ case PTR_TO_SOCKET_OR_NULL: ++ case PTR_TO_SOCK_COMMON: ++ case PTR_TO_SOCK_COMMON_OR_NULL: ++ case PTR_TO_TCP_SOCK: ++ case PTR_TO_TCP_SOCK_OR_NULL: ++ case PTR_TO_XDP_SOCK: ++ verbose(env, "R%d pointer arithmetic on %s prohibited\n", ++ dst, reg_type_str[ptr_reg->type]); ++ return -EACCES; ++ default: ++ break; ++ } ++ ++ /* In case of 'scalar += pointer', dst_reg inherits pointer type and id. ++ * The id may be overwritten later if we create a new variable offset. ++ */ ++ dst_reg->type = ptr_reg->type; ++ dst_reg->id = ptr_reg->id; ++ ++ if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || ++ !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) ++ return -EINVAL; ++ ++ if (sanitize_needed(opcode)) { ++ ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, ++ &info, false); ++ if (ret < 0) ++ return sanitize_err(env, insn, ret, off_reg, dst_reg); ++ } ++ ++ switch (opcode) { ++ case BPF_ADD: ++ /* We can take a fixed offset as long as it doesn't overflow ++ * the s32 'off' field ++ */ ++ if (known && (ptr_reg->off + smin_val == ++ (s64)(s32)(ptr_reg->off + smin_val))) { ++ /* pointer += K. Accumulate it into fixed offset */ ++ dst_reg->smin_value = smin_ptr; ++ dst_reg->smax_value = smax_ptr; ++ dst_reg->umin_value = umin_ptr; ++ dst_reg->umax_value = umax_ptr; ++ dst_reg->var_off = ptr_reg->var_off; ++ dst_reg->off = ptr_reg->off + smin_val; ++ dst_reg->raw = ptr_reg->raw; ++ break; ++ } ++ /* A new variable offset is created. Note that off_reg->off ++ * == 0, since it's a scalar. ++ * dst_reg gets the pointer type and since some positive ++ * integer value was added to the pointer, give it a new 'id' ++ * if it's a PTR_TO_PACKET. ++ * this creates a new 'base' pointer, off_reg (variable) gets ++ * added into the variable offset, and we copy the fixed offset ++ * from ptr_reg. ++ */ ++ if (signed_add_overflows(smin_ptr, smin_val) || ++ signed_add_overflows(smax_ptr, smax_val)) { ++ dst_reg->smin_value = S64_MIN; ++ dst_reg->smax_value = S64_MAX; ++ } else { ++ dst_reg->smin_value = smin_ptr + smin_val; ++ dst_reg->smax_value = smax_ptr + smax_val; ++ } ++ if (umin_ptr + umin_val < umin_ptr || ++ umax_ptr + umax_val < umax_ptr) { ++ dst_reg->umin_value = 0; ++ dst_reg->umax_value = U64_MAX; ++ } else { ++ dst_reg->umin_value = umin_ptr + umin_val; ++ dst_reg->umax_value = umax_ptr + umax_val; ++ } ++ dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); ++ dst_reg->off = ptr_reg->off; ++ dst_reg->raw = ptr_reg->raw; ++ if (reg_is_pkt_pointer(ptr_reg)) { ++ dst_reg->id = ++env->id_gen; ++ /* something was added to pkt_ptr, set range to zero */ ++ dst_reg->raw = 0; ++ } ++ break; ++ case BPF_SUB: ++ if (dst_reg == off_reg) { ++ /* scalar -= pointer. Creates an unknown scalar */ ++ verbose(env, "R%d tried to subtract pointer from scalar\n", ++ dst); ++ return -EACCES; ++ } ++ /* We don't allow subtraction from FP, because (according to ++ * test_verifier.c test "invalid fp arithmetic", JITs might not ++ * be able to deal with it. ++ */ ++ if (ptr_reg->type == PTR_TO_STACK) { ++ verbose(env, "R%d subtraction from stack pointer prohibited\n", ++ dst); ++ return -EACCES; ++ } ++ if (known && (ptr_reg->off - smin_val == ++ (s64)(s32)(ptr_reg->off - smin_val))) { ++ /* pointer -= K. Subtract it from fixed offset */ ++ dst_reg->smin_value = smin_ptr; ++ dst_reg->smax_value = smax_ptr; ++ dst_reg->umin_value = umin_ptr; ++ dst_reg->umax_value = umax_ptr; ++ dst_reg->var_off = ptr_reg->var_off; ++ dst_reg->id = ptr_reg->id; ++ dst_reg->off = ptr_reg->off - smin_val; ++ dst_reg->raw = ptr_reg->raw; ++ break; ++ } ++ /* A new variable offset is created. If the subtrahend is known ++ * nonnegative, then any reg->range we had before is still good. ++ */ ++ if (signed_sub_overflows(smin_ptr, smax_val) || ++ signed_sub_overflows(smax_ptr, smin_val)) { ++ /* Overflow possible, we know nothing */ ++ dst_reg->smin_value = S64_MIN; ++ dst_reg->smax_value = S64_MAX; ++ } else { ++ dst_reg->smin_value = smin_ptr - smax_val; ++ dst_reg->smax_value = smax_ptr - smin_val; ++ } ++ if (umin_ptr < umax_val) { ++ /* Overflow possible, we know nothing */ ++ dst_reg->umin_value = 0; ++ dst_reg->umax_value = U64_MAX; ++ } else { ++ /* Cannot overflow (as long as bounds are consistent) */ ++ dst_reg->umin_value = umin_ptr - umax_val; ++ dst_reg->umax_value = umax_ptr - umin_val; ++ } ++ dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); ++ dst_reg->off = ptr_reg->off; ++ dst_reg->raw = ptr_reg->raw; ++ if (reg_is_pkt_pointer(ptr_reg)) { ++ dst_reg->id = ++env->id_gen; ++ /* something was added to pkt_ptr, set range to zero */ ++ if (smin_val < 0) ++ dst_reg->raw = 0; ++ } ++ break; ++ case BPF_AND: ++ case BPF_OR: ++ case BPF_XOR: ++ /* bitwise ops on pointers are troublesome, prohibit. */ ++ verbose(env, "R%d bitwise operator %s on pointer prohibited\n", ++ dst, bpf_alu_string[opcode >> 4]); ++ return -EACCES; ++ default: ++ /* other operators (e.g. MUL,LSH) produce non-pointer results */ ++ verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", ++ dst, bpf_alu_string[opcode >> 4]); ++ return -EACCES; ++ } ++ ++ if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) ++ return -EINVAL; ++ ++ __update_reg_bounds(dst_reg); ++ __reg_deduce_bounds(dst_reg); ++ __reg_bound_offset(dst_reg); ++ ++ if (sanitize_check_bounds(env, insn, dst_reg) < 0) ++ return -EACCES; ++ if (sanitize_needed(opcode)) { ++ ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, ++ &info, true); ++ if (ret < 0) ++ return sanitize_err(env, insn, ret, off_reg, dst_reg); ++ } ++ ++ return 0; ++} ++ ++/* WARNING: This function does calculations on 64-bit values, but the actual ++ * execution may occur on 32-bit values. Therefore, things like bitshifts ++ * need extra checks in the 32-bit case. ++ */ ++static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, ++ struct bpf_insn *insn, ++ struct bpf_reg_state *dst_reg, ++ struct bpf_reg_state src_reg) ++{ ++ struct bpf_reg_state *regs = cur_regs(env); ++ u8 opcode = BPF_OP(insn->code); ++ bool src_known, dst_known; ++ s64 smin_val, smax_val; ++ u64 umin_val, umax_val; ++ u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; ++ int ret; ++ ++ if (insn_bitness == 32) { ++ /* Relevant for 32-bit RSH: Information can propagate towards ++ * LSB, so it isn't sufficient to only truncate the output to ++ * 32 bits. ++ */ ++ coerce_reg_to_size(dst_reg, 4); ++ coerce_reg_to_size(&src_reg, 4); ++ } ++ ++ smin_val = src_reg.smin_value; ++ smax_val = src_reg.smax_value; ++ umin_val = src_reg.umin_value; ++ umax_val = src_reg.umax_value; ++ src_known = tnum_is_const(src_reg.var_off); ++ dst_known = tnum_is_const(dst_reg->var_off); ++ ++ if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || ++ smin_val > smax_val || umin_val > umax_val) { ++ /* Taint dst register if offset had invalid bounds derived from ++ * e.g. dead branches. ++ */ ++ __mark_reg_unknown(env, dst_reg); ++ return 0; ++ } ++ ++ if (!src_known && ++ opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { ++ __mark_reg_unknown(env, dst_reg); ++ return 0; ++ } ++ ++ if (sanitize_needed(opcode)) { ++ ret = sanitize_val_alu(env, insn); ++ if (ret < 0) ++ return sanitize_err(env, insn, ret, NULL, NULL); ++ } ++ ++ switch (opcode) { ++ case BPF_ADD: ++ if (signed_add_overflows(dst_reg->smin_value, smin_val) || ++ signed_add_overflows(dst_reg->smax_value, smax_val)) { ++ dst_reg->smin_value = S64_MIN; ++ dst_reg->smax_value = S64_MAX; ++ } else { ++ dst_reg->smin_value += smin_val; ++ dst_reg->smax_value += smax_val; ++ } ++ if (dst_reg->umin_value + umin_val < umin_val || ++ dst_reg->umax_value + umax_val < umax_val) { ++ dst_reg->umin_value = 0; ++ dst_reg->umax_value = U64_MAX; ++ } else { ++ dst_reg->umin_value += umin_val; ++ dst_reg->umax_value += umax_val; ++ } ++ dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); ++ break; ++ case BPF_SUB: ++ if (signed_sub_overflows(dst_reg->smin_value, smax_val) || ++ signed_sub_overflows(dst_reg->smax_value, smin_val)) { ++ /* Overflow possible, we know nothing */ ++ dst_reg->smin_value = S64_MIN; ++ dst_reg->smax_value = S64_MAX; ++ } else { ++ dst_reg->smin_value -= smax_val; ++ dst_reg->smax_value -= smin_val; ++ } ++ if (dst_reg->umin_value < umax_val) { ++ /* Overflow possible, we know nothing */ ++ dst_reg->umin_value = 0; ++ dst_reg->umax_value = U64_MAX; ++ } else { ++ /* Cannot overflow (as long as bounds are consistent) */ ++ dst_reg->umin_value -= umax_val; ++ dst_reg->umax_value -= umin_val; ++ } ++ dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); ++ break; ++ case BPF_MUL: ++ dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off); ++ if (smin_val < 0 || dst_reg->smin_value < 0) { ++ /* Ain't nobody got time to multiply that sign */ ++ __mark_reg_unbounded(dst_reg); ++ __update_reg_bounds(dst_reg); ++ break; ++ } ++ /* Both values are positive, so we can work with unsigned and ++ * copy the result to signed (unless it exceeds S64_MAX). ++ */ ++ if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) { ++ /* Potential overflow, we know nothing */ ++ __mark_reg_unbounded(dst_reg); ++ /* (except what we can learn from the var_off) */ ++ __update_reg_bounds(dst_reg); ++ break; ++ } ++ dst_reg->umin_value *= umin_val; ++ dst_reg->umax_value *= umax_val; ++ if (dst_reg->umax_value > S64_MAX) { ++ /* Overflow possible, we know nothing */ ++ dst_reg->smin_value = S64_MIN; ++ dst_reg->smax_value = S64_MAX; ++ } else { ++ dst_reg->smin_value = dst_reg->umin_value; ++ dst_reg->smax_value = dst_reg->umax_value; ++ } ++ break; ++ case BPF_AND: ++ if (src_known && dst_known) { ++ __mark_reg_known(dst_reg, dst_reg->var_off.value & ++ src_reg.var_off.value); ++ break; ++ } ++ /* We get our minimum from the var_off, since that's inherently ++ * bitwise. Our maximum is the minimum of the operands' maxima. ++ */ ++ dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off); ++ dst_reg->umin_value = dst_reg->var_off.value; ++ dst_reg->umax_value = min(dst_reg->umax_value, umax_val); ++ if (dst_reg->smin_value < 0 || smin_val < 0) { ++ /* Lose signed bounds when ANDing negative numbers, ++ * ain't nobody got time for that. ++ */ ++ dst_reg->smin_value = S64_MIN; ++ dst_reg->smax_value = S64_MAX; ++ } else { ++ /* ANDing two positives gives a positive, so safe to ++ * cast result into s64. ++ */ ++ dst_reg->smin_value = dst_reg->umin_value; ++ dst_reg->smax_value = dst_reg->umax_value; ++ } ++ /* We may learn something more from the var_off */ ++ __update_reg_bounds(dst_reg); ++ break; ++ case BPF_OR: ++ if (src_known && dst_known) { ++ __mark_reg_known(dst_reg, dst_reg->var_off.value | ++ src_reg.var_off.value); ++ break; ++ } ++ /* We get our maximum from the var_off, and our minimum is the ++ * maximum of the operands' minima ++ */ ++ dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off); ++ dst_reg->umin_value = max(dst_reg->umin_value, umin_val); ++ dst_reg->umax_value = dst_reg->var_off.value | ++ dst_reg->var_off.mask; ++ if (dst_reg->smin_value < 0 || smin_val < 0) { ++ /* Lose signed bounds when ORing negative numbers, ++ * ain't nobody got time for that. ++ */ ++ dst_reg->smin_value = S64_MIN; ++ dst_reg->smax_value = S64_MAX; ++ } else { ++ /* ORing two positives gives a positive, so safe to ++ * cast result into s64. ++ */ ++ dst_reg->smin_value = dst_reg->umin_value; ++ dst_reg->smax_value = dst_reg->umax_value; ++ } ++ /* We may learn something more from the var_off */ ++ __update_reg_bounds(dst_reg); ++ break; ++ case BPF_LSH: ++ if (umax_val >= insn_bitness) { ++ /* Shifts greater than 31 or 63 are undefined. ++ * This includes shifts by a negative number. ++ */ ++ mark_reg_unknown(env, regs, insn->dst_reg); ++ break; ++ } ++ /* We lose all sign bit information (except what we can pick ++ * up from var_off) ++ */ ++ dst_reg->smin_value = S64_MIN; ++ dst_reg->smax_value = S64_MAX; ++ /* If we might shift our top bit out, then we know nothing */ ++ if (dst_reg->umax_value > 1ULL << (63 - umax_val)) { ++ dst_reg->umin_value = 0; ++ dst_reg->umax_value = U64_MAX; ++ } else { ++ dst_reg->umin_value <<= umin_val; ++ dst_reg->umax_value <<= umax_val; ++ } ++ dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); ++ /* We may learn something more from the var_off */ ++ __update_reg_bounds(dst_reg); ++ break; ++ case BPF_RSH: ++ if (umax_val >= insn_bitness) { ++ /* Shifts greater than 31 or 63 are undefined. ++ * This includes shifts by a negative number. ++ */ ++ mark_reg_unknown(env, regs, insn->dst_reg); ++ break; ++ } ++ /* BPF_RSH is an unsigned shift. If the value in dst_reg might ++ * be negative, then either: ++ * 1) src_reg might be zero, so the sign bit of the result is ++ * unknown, so we lose our signed bounds ++ * 2) it's known negative, thus the unsigned bounds capture the ++ * signed bounds ++ * 3) the signed bounds cross zero, so they tell us nothing ++ * about the result ++ * If the value in dst_reg is known nonnegative, then again the ++ * unsigned bounts capture the signed bounds. ++ * Thus, in all cases it suffices to blow away our signed bounds ++ * and rely on inferring new ones from the unsigned bounds and ++ * var_off of the result. ++ */ ++ dst_reg->smin_value = S64_MIN; ++ dst_reg->smax_value = S64_MAX; ++ dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); ++ dst_reg->umin_value >>= umax_val; ++ dst_reg->umax_value >>= umin_val; ++ /* We may learn something more from the var_off */ ++ __update_reg_bounds(dst_reg); ++ break; ++ case BPF_ARSH: ++ if (umax_val >= insn_bitness) { ++ /* Shifts greater than 31 or 63 are undefined. ++ * This includes shifts by a negative number. ++ */ ++ mark_reg_unknown(env, regs, insn->dst_reg); ++ break; ++ } ++ ++ /* Upon reaching here, src_known is true and ++ * umax_val is equal to umin_val. ++ */ ++ if (insn_bitness == 32) { ++ dst_reg->smin_value = (u32)(((s32)dst_reg->smin_value) >> umin_val); ++ dst_reg->smax_value = (u32)(((s32)dst_reg->smax_value) >> umin_val); ++ } else { ++ dst_reg->smin_value >>= umin_val; ++ dst_reg->smax_value >>= umin_val; ++ } ++ ++ dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, ++ insn_bitness); ++ ++ /* blow away the dst_reg umin_value/umax_value and rely on ++ * dst_reg var_off to refine the result. ++ */ ++ dst_reg->umin_value = 0; ++ dst_reg->umax_value = U64_MAX; ++ __update_reg_bounds(dst_reg); ++ break; ++ default: ++ mark_reg_unknown(env, regs, insn->dst_reg); ++ break; ++ } ++ ++ if (BPF_CLASS(insn->code) != BPF_ALU64) { ++ /* 32-bit ALU ops are (32,32)->32 */ ++ coerce_reg_to_size(dst_reg, 4); ++ } ++ ++ __reg_deduce_bounds(dst_reg); ++ __reg_bound_offset(dst_reg); ++ return 0; ++} ++ ++/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max ++ * and var_off. ++ */ ++static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, ++ struct bpf_insn *insn) ++{ ++ struct bpf_verifier_state *vstate = env->cur_state; ++ struct bpf_func_state *state = vstate->frame[vstate->curframe]; ++ struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; ++ struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; ++ u8 opcode = BPF_OP(insn->code); ++ int err; ++ ++ dst_reg = ®s[insn->dst_reg]; ++ src_reg = NULL; ++ if (dst_reg->type != SCALAR_VALUE) ++ ptr_reg = dst_reg; ++ if (BPF_SRC(insn->code) == BPF_X) { ++ src_reg = ®s[insn->src_reg]; ++ if (src_reg->type != SCALAR_VALUE) { ++ if (dst_reg->type != SCALAR_VALUE) { ++ /* Combining two pointers by any ALU op yields ++ * an arbitrary scalar. Disallow all math except ++ * pointer subtraction ++ */ ++ if (opcode == BPF_SUB && env->allow_ptr_leaks) { ++ mark_reg_unknown(env, regs, insn->dst_reg); ++ return 0; ++ } ++ verbose(env, "R%d pointer %s pointer prohibited\n", ++ insn->dst_reg, ++ bpf_alu_string[opcode >> 4]); ++ return -EACCES; ++ } else { ++ /* scalar += pointer ++ * This is legal, but we have to reverse our ++ * src/dest handling in computing the range ++ */ ++ err = mark_chain_precision(env, insn->dst_reg); ++ if (err) ++ return err; ++ return adjust_ptr_min_max_vals(env, insn, ++ src_reg, dst_reg); ++ } ++ } else if (ptr_reg) { ++ /* pointer += scalar */ ++ err = mark_chain_precision(env, insn->src_reg); ++ if (err) ++ return err; ++ return adjust_ptr_min_max_vals(env, insn, ++ dst_reg, src_reg); ++ } ++ } else { ++ /* Pretend the src is a reg with a known value, since we only ++ * need to be able to read from this state. ++ */ ++ off_reg.type = SCALAR_VALUE; ++ __mark_reg_known(&off_reg, insn->imm); ++ src_reg = &off_reg; ++ if (ptr_reg) /* pointer += K */ ++ return adjust_ptr_min_max_vals(env, insn, ++ ptr_reg, src_reg); ++ } ++ ++ /* Got here implies adding two SCALAR_VALUEs */ ++ if (WARN_ON_ONCE(ptr_reg)) { ++ print_verifier_state(env, state); ++ verbose(env, "verifier internal error: unexpected ptr_reg\n"); ++ return -EINVAL; ++ } ++ if (WARN_ON(!src_reg)) { ++ print_verifier_state(env, state); ++ verbose(env, "verifier internal error: no src_reg\n"); ++ return -EINVAL; ++ } ++ return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); ++} ++ + /* check validity of 32-bit and 64-bit arithmetic operations */ +-static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) ++static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) + { +- struct reg_state *regs = env->cur_state.regs; ++ struct bpf_reg_state *regs = cur_regs(env); + u8 opcode = BPF_OP(insn->code); + int err; + +@@ -1034,30 +5134,31 @@ static int check_alu_op(struct verifier_ + if (BPF_SRC(insn->code) != 0 || + insn->src_reg != BPF_REG_0 || + insn->off != 0 || insn->imm != 0) { +- verbose("BPF_NEG uses reserved fields\n"); ++ verbose(env, "BPF_NEG uses reserved fields\n"); + return -EINVAL; + } + } else { + if (insn->src_reg != BPF_REG_0 || insn->off != 0 || +- (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) { +- verbose("BPF_END uses reserved fields\n"); ++ (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) || ++ BPF_CLASS(insn->code) == BPF_ALU64) { ++ verbose(env, "BPF_END uses reserved fields\n"); + return -EINVAL; + } + } + + /* check src operand */ +- err = check_reg_arg(regs, insn->dst_reg, SRC_OP); ++ err = check_reg_arg(env, insn->dst_reg, SRC_OP); + if (err) + return err; + + if (is_pointer_value(env, insn->dst_reg)) { +- verbose("R%d pointer arithmetic prohibited\n", ++ verbose(env, "R%d pointer arithmetic prohibited\n", + insn->dst_reg); + return -EACCES; + } + + /* check dest operand */ +- err = check_reg_arg(regs, insn->dst_reg, DST_OP); ++ err = check_reg_arg(env, insn->dst_reg, DST_OP); + if (err) + return err; + +@@ -1065,81 +5166,100 @@ static int check_alu_op(struct verifier_ + + if (BPF_SRC(insn->code) == BPF_X) { + if (insn->imm != 0 || insn->off != 0) { +- verbose("BPF_MOV uses reserved fields\n"); ++ verbose(env, "BPF_MOV uses reserved fields\n"); + return -EINVAL; + } + + /* check src operand */ +- err = check_reg_arg(regs, insn->src_reg, SRC_OP); ++ err = check_reg_arg(env, insn->src_reg, SRC_OP); + if (err) + return err; + } else { + if (insn->src_reg != BPF_REG_0 || insn->off != 0) { +- verbose("BPF_MOV uses reserved fields\n"); ++ verbose(env, "BPF_MOV uses reserved fields\n"); + return -EINVAL; + } + } + +- /* check dest operand */ +- err = check_reg_arg(regs, insn->dst_reg, DST_OP); ++ /* check dest operand, mark as required later */ ++ err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); + if (err) + return err; + + if (BPF_SRC(insn->code) == BPF_X) { ++ struct bpf_reg_state *src_reg = regs + insn->src_reg; ++ struct bpf_reg_state *dst_reg = regs + insn->dst_reg; ++ + if (BPF_CLASS(insn->code) == BPF_ALU64) { + /* case: R1 = R2 + * copy register state to dest reg + */ +- regs[insn->dst_reg] = regs[insn->src_reg]; ++ *dst_reg = *src_reg; ++ dst_reg->live |= REG_LIVE_WRITTEN; ++ dst_reg->subreg_def = DEF_NOT_SUBREG; + } else { ++ /* R1 = (u32) R2 */ + if (is_pointer_value(env, insn->src_reg)) { +- verbose("R%d partial copy of pointer\n", ++ verbose(env, ++ "R%d partial copy of pointer\n", + insn->src_reg); + return -EACCES; ++ } else if (src_reg->type == SCALAR_VALUE) { ++ *dst_reg = *src_reg; ++ dst_reg->live |= REG_LIVE_WRITTEN; ++ dst_reg->subreg_def = env->insn_idx + 1; ++ } else { ++ mark_reg_unknown(env, regs, ++ insn->dst_reg); + } +- regs[insn->dst_reg].type = UNKNOWN_VALUE; +- regs[insn->dst_reg].map_ptr = NULL; ++ coerce_reg_to_size(dst_reg, 4); + } + } else { + /* case: R = imm + * remember the value we stored into this reg + */ +- regs[insn->dst_reg].type = CONST_IMM; +- regs[insn->dst_reg].imm = insn->imm; ++ /* clear any state __mark_reg_known doesn't set */ ++ mark_reg_unknown(env, regs, insn->dst_reg); ++ regs[insn->dst_reg].type = SCALAR_VALUE; ++ if (BPF_CLASS(insn->code) == BPF_ALU64) { ++ __mark_reg_known(regs + insn->dst_reg, ++ insn->imm); ++ } else { ++ __mark_reg_known(regs + insn->dst_reg, ++ (u32)insn->imm); ++ } + } + + } else if (opcode > BPF_END) { +- verbose("invalid BPF_ALU opcode %x\n", opcode); ++ verbose(env, "invalid BPF_ALU opcode %x\n", opcode); + return -EINVAL; + + } else { /* all other ALU ops: and, sub, xor, add, ... */ + +- bool stack_relative = false; +- + if (BPF_SRC(insn->code) == BPF_X) { + if (insn->imm != 0 || insn->off != 0) { +- verbose("BPF_ALU uses reserved fields\n"); ++ verbose(env, "BPF_ALU uses reserved fields\n"); + return -EINVAL; + } + /* check src1 operand */ +- err = check_reg_arg(regs, insn->src_reg, SRC_OP); ++ err = check_reg_arg(env, insn->src_reg, SRC_OP); + if (err) + return err; + } else { + if (insn->src_reg != BPF_REG_0 || insn->off != 0) { +- verbose("BPF_ALU uses reserved fields\n"); ++ verbose(env, "BPF_ALU uses reserved fields\n"); + return -EINVAL; + } + } + + /* check src2 operand */ +- err = check_reg_arg(regs, insn->dst_reg, SRC_OP); ++ err = check_reg_arg(env, insn->dst_reg, SRC_OP); + if (err) + return err; + + if ((opcode == BPF_MOD || opcode == BPF_DIV) && + BPF_SRC(insn->code) == BPF_K && insn->imm == 0) { +- verbose("div by zero\n"); ++ verbose(env, "div by zero\n"); + return -EINVAL; + } + +@@ -1148,185 +5268,980 @@ static int check_alu_op(struct verifier_ + int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; + + if (insn->imm < 0 || insn->imm >= size) { +- verbose("invalid shift %d\n", insn->imm); ++ verbose(env, "invalid shift %d\n", insn->imm); + return -EINVAL; + } + } + +- /* pattern match 'bpf_add Rx, imm' instruction */ +- if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && +- regs[insn->dst_reg].type == FRAME_PTR && +- BPF_SRC(insn->code) == BPF_K) { +- stack_relative = true; +- } else if (is_pointer_value(env, insn->dst_reg)) { +- verbose("R%d pointer arithmetic prohibited\n", +- insn->dst_reg); +- return -EACCES; +- } else if (BPF_SRC(insn->code) == BPF_X && +- is_pointer_value(env, insn->src_reg)) { +- verbose("R%d pointer arithmetic prohibited\n", +- insn->src_reg); +- return -EACCES; +- } +- + /* check dest operand */ +- err = check_reg_arg(regs, insn->dst_reg, DST_OP); ++ err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); + if (err) + return err; + +- if (stack_relative) { +- regs[insn->dst_reg].type = PTR_TO_STACK; +- regs[insn->dst_reg].imm = insn->imm; +- } ++ return adjust_reg_min_max_vals(env, insn); + } + + return 0; + } + +-static int check_cond_jmp_op(struct verifier_env *env, ++static void __find_good_pkt_pointers(struct bpf_func_state *state, ++ struct bpf_reg_state *dst_reg, ++ enum bpf_reg_type type, u16 new_range) ++{ ++ struct bpf_reg_state *reg; ++ int i; ++ ++ for (i = 0; i < MAX_BPF_REG; i++) { ++ reg = &state->regs[i]; ++ if (reg->type == type && reg->id == dst_reg->id) ++ /* keep the maximum range already checked */ ++ reg->range = max(reg->range, new_range); ++ } ++ ++ bpf_for_each_spilled_reg(i, state, reg) { ++ if (!reg) ++ continue; ++ if (reg->type == type && reg->id == dst_reg->id) ++ reg->range = max(reg->range, new_range); ++ } ++} ++ ++static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, ++ struct bpf_reg_state *dst_reg, ++ enum bpf_reg_type type, ++ bool range_right_open) ++{ ++ u16 new_range; ++ int i; ++ ++ if (dst_reg->off < 0 || ++ (dst_reg->off == 0 && range_right_open)) ++ /* This doesn't give us any range */ ++ return; ++ ++ if (dst_reg->umax_value > MAX_PACKET_OFF || ++ dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF) ++ /* Risk of overflow. For instance, ptr + (1<<63) may be less ++ * than pkt_end, but that's because it's also less than pkt. ++ */ ++ return; ++ ++ new_range = dst_reg->off; ++ if (range_right_open) ++ new_range--; ++ ++ /* Examples for register markings: ++ * ++ * pkt_data in dst register: ++ * ++ * r2 = r3; ++ * r2 += 8; ++ * if (r2 > pkt_end) goto ++ * ++ * ++ * r2 = r3; ++ * r2 += 8; ++ * if (r2 < pkt_end) goto ++ * ++ * ++ * Where: ++ * r2 == dst_reg, pkt_end == src_reg ++ * r2=pkt(id=n,off=8,r=0) ++ * r3=pkt(id=n,off=0,r=0) ++ * ++ * pkt_data in src register: ++ * ++ * r2 = r3; ++ * r2 += 8; ++ * if (pkt_end >= r2) goto ++ * ++ * ++ * r2 = r3; ++ * r2 += 8; ++ * if (pkt_end <= r2) goto ++ * ++ * ++ * Where: ++ * pkt_end == dst_reg, r2 == src_reg ++ * r2=pkt(id=n,off=8,r=0) ++ * r3=pkt(id=n,off=0,r=0) ++ * ++ * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) ++ * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8) ++ * and [r3, r3 + 8-1) respectively is safe to access depending on ++ * the check. ++ */ ++ ++ /* If our ids match, then we must have the same max_value. And we ++ * don't care about the other reg's fixed offset, since if it's too big ++ * the range won't allow anything. ++ * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. ++ */ ++ for (i = 0; i <= vstate->curframe; i++) ++ __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, ++ new_range); ++} ++ ++/* compute branch direction of the expression "if (reg opcode val) goto target;" ++ * and return: ++ * 1 - branch will be taken and "goto target" will be executed ++ * 0 - branch will not be taken and fall-through to next insn ++ * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] ++ */ ++static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, ++ bool is_jmp32) ++{ ++ struct bpf_reg_state reg_lo; ++ s64 sval; ++ ++ if (__is_pointer_value(false, reg)) ++ return -1; ++ ++ if (is_jmp32) { ++ reg_lo = *reg; ++ reg = ®_lo; ++ /* For JMP32, only low 32 bits are compared, coerce_reg_to_size ++ * could truncate high bits and update umin/umax according to ++ * information of low bits. ++ */ ++ coerce_reg_to_size(reg, 4); ++ /* smin/smax need special handling. For example, after coerce, ++ * if smin_value is 0x00000000ffffffffLL, the value is -1 when ++ * used as operand to JMP32. It is a negative number from s32's ++ * point of view, while it is a positive number when seen as ++ * s64. The smin/smax are kept as s64, therefore, when used with ++ * JMP32, they need to be transformed into s32, then sign ++ * extended back to s64. ++ * ++ * Also, smin/smax were copied from umin/umax. If umin/umax has ++ * different sign bit, then min/max relationship doesn't ++ * maintain after casting into s32, for this case, set smin/smax ++ * to safest range. ++ */ ++ if ((reg->umax_value ^ reg->umin_value) & ++ (1ULL << 31)) { ++ reg->smin_value = S32_MIN; ++ reg->smax_value = S32_MAX; ++ } ++ reg->smin_value = (s64)(s32)reg->smin_value; ++ reg->smax_value = (s64)(s32)reg->smax_value; ++ ++ val = (u32)val; ++ sval = (s64)(s32)val; ++ } else { ++ sval = (s64)val; ++ } ++ ++ switch (opcode) { ++ case BPF_JEQ: ++ if (tnum_is_const(reg->var_off)) ++ return !!tnum_equals_const(reg->var_off, val); ++ break; ++ case BPF_JNE: ++ if (tnum_is_const(reg->var_off)) ++ return !tnum_equals_const(reg->var_off, val); ++ break; ++ case BPF_JSET: ++ if ((~reg->var_off.mask & reg->var_off.value) & val) ++ return 1; ++ if (!((reg->var_off.mask | reg->var_off.value) & val)) ++ return 0; ++ break; ++ case BPF_JGT: ++ if (reg->umin_value > val) ++ return 1; ++ else if (reg->umax_value <= val) ++ return 0; ++ break; ++ case BPF_JSGT: ++ if (reg->smin_value > sval) ++ return 1; ++ else if (reg->smax_value < sval) ++ return 0; ++ break; ++ case BPF_JLT: ++ if (reg->umax_value < val) ++ return 1; ++ else if (reg->umin_value >= val) ++ return 0; ++ break; ++ case BPF_JSLT: ++ if (reg->smax_value < sval) ++ return 1; ++ else if (reg->smin_value >= sval) ++ return 0; ++ break; ++ case BPF_JGE: ++ if (reg->umin_value >= val) ++ return 1; ++ else if (reg->umax_value < val) ++ return 0; ++ break; ++ case BPF_JSGE: ++ if (reg->smin_value >= sval) ++ return 1; ++ else if (reg->smax_value < sval) ++ return 0; ++ break; ++ case BPF_JLE: ++ if (reg->umax_value <= val) ++ return 1; ++ else if (reg->umin_value > val) ++ return 0; ++ break; ++ case BPF_JSLE: ++ if (reg->smax_value <= sval) ++ return 1; ++ else if (reg->smin_value > sval) ++ return 0; ++ break; ++ } ++ ++ return -1; ++} ++ ++/* Generate min value of the high 32-bit from TNUM info. */ ++static u64 gen_hi_min(struct tnum var) ++{ ++ return var.value & ~0xffffffffULL; ++} ++ ++/* Generate max value of the high 32-bit from TNUM info. */ ++static u64 gen_hi_max(struct tnum var) ++{ ++ return (var.value | var.mask) & ~0xffffffffULL; ++} ++ ++/* Return true if VAL is compared with a s64 sign extended from s32, and they ++ * are with the same signedness. ++ */ ++static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg) ++{ ++ return ((s32)sval >= 0 && ++ reg->smin_value >= 0 && reg->smax_value <= S32_MAX) || ++ ((s32)sval < 0 && ++ reg->smax_value <= 0 && reg->smin_value >= S32_MIN); ++} ++ ++/* Constrain the possible values of @reg with unsigned upper bound @bound. ++ * If @is_exclusive, @bound is an exclusive limit, otherwise it is inclusive. ++ * If @is_jmp32, @bound is a 32-bit value that only constrains the low 32 bits ++ * of @reg. ++ */ ++static void set_upper_bound(struct bpf_reg_state *reg, u64 bound, bool is_jmp32, ++ bool is_exclusive) ++{ ++ if (is_exclusive) { ++ /* There are no values for `reg` that make `reg<0` true. */ ++ if (bound == 0) ++ return; ++ bound--; ++ } ++ if (is_jmp32) { ++ /* Constrain the register's value in the tnum representation. ++ * For 64-bit comparisons this happens later in ++ * __reg_bound_offset(), but for 32-bit comparisons, we can be ++ * more precise than what can be derived from the updated ++ * numeric bounds. ++ */ ++ struct tnum t = tnum_range(0, bound); ++ ++ t.mask |= ~0xffffffffULL; /* upper half is unknown */ ++ reg->var_off = tnum_intersect(reg->var_off, t); ++ ++ /* Compute the 64-bit bound from the 32-bit bound. */ ++ bound += gen_hi_max(reg->var_off); ++ } ++ reg->umax_value = min(reg->umax_value, bound); ++} ++ ++/* Constrain the possible values of @reg with unsigned lower bound @bound. ++ * If @is_exclusive, @bound is an exclusive limit, otherwise it is inclusive. ++ * If @is_jmp32, @bound is a 32-bit value that only constrains the low 32 bits ++ * of @reg. ++ */ ++static void set_lower_bound(struct bpf_reg_state *reg, u64 bound, bool is_jmp32, ++ bool is_exclusive) ++{ ++ if (is_exclusive) { ++ /* There are no values for `reg` that make `reg>MAX` true. */ ++ if (bound == (is_jmp32 ? U32_MAX : U64_MAX)) ++ return; ++ bound++; ++ } ++ if (is_jmp32) { ++ /* Constrain the register's value in the tnum representation. ++ * For 64-bit comparisons this happens later in ++ * __reg_bound_offset(), but for 32-bit comparisons, we can be ++ * more precise than what can be derived from the updated ++ * numeric bounds. ++ */ ++ struct tnum t = tnum_range(bound, U32_MAX); ++ ++ t.mask |= ~0xffffffffULL; /* upper half is unknown */ ++ reg->var_off = tnum_intersect(reg->var_off, t); ++ ++ /* Compute the 64-bit bound from the 32-bit bound. */ ++ bound += gen_hi_min(reg->var_off); ++ } ++ reg->umin_value = max(reg->umin_value, bound); ++} ++ ++/* Adjusts the register min/max values in the case that the dst_reg is the ++ * variable register that we are working on, and src_reg is a constant or we're ++ * simply doing a BPF_K check. ++ * In JEQ/JNE cases we also adjust the var_off values. ++ */ ++static void reg_set_min_max(struct bpf_reg_state *true_reg, ++ struct bpf_reg_state *false_reg, u64 val, ++ u8 opcode, bool is_jmp32) ++{ ++ s64 sval; ++ ++ /* If the dst_reg is a pointer, we can't learn anything about its ++ * variable offset from the compare (unless src_reg were a pointer into ++ * the same object, but we don't bother with that. ++ * Since false_reg and true_reg have the same type by construction, we ++ * only need to check one of them for pointerness. ++ */ ++ if (__is_pointer_value(false, false_reg)) ++ return; ++ ++ val = is_jmp32 ? (u32)val : val; ++ sval = is_jmp32 ? (s64)(s32)val : (s64)val; ++ ++ switch (opcode) { ++ case BPF_JEQ: ++ case BPF_JNE: ++ { ++ struct bpf_reg_state *reg = ++ opcode == BPF_JEQ ? true_reg : false_reg; ++ ++ /* For BPF_JEQ, if this is false we know nothing Jon Snow, but ++ * if it is true we know the value for sure. Likewise for ++ * BPF_JNE. ++ */ ++ if (is_jmp32) { ++ u64 old_v = reg->var_off.value; ++ u64 hi_mask = ~0xffffffffULL; ++ ++ reg->var_off.value = (old_v & hi_mask) | val; ++ reg->var_off.mask &= hi_mask; ++ } else { ++ __mark_reg_known(reg, val); ++ } ++ break; ++ } ++ case BPF_JSET: ++ false_reg->var_off = tnum_and(false_reg->var_off, ++ tnum_const(~val)); ++ if (is_power_of_2(val)) ++ true_reg->var_off = tnum_or(true_reg->var_off, ++ tnum_const(val)); ++ break; ++ case BPF_JGE: ++ case BPF_JGT: ++ { ++ set_upper_bound(false_reg, val, is_jmp32, opcode == BPF_JGE); ++ set_lower_bound(true_reg, val, is_jmp32, opcode == BPF_JGT); ++ break; ++ } ++ case BPF_JSGE: ++ case BPF_JSGT: ++ { ++ s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; ++ s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; ++ ++ /* If the full s64 was not sign-extended from s32 then don't ++ * deduct further info. ++ */ ++ if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) ++ break; ++ false_reg->smax_value = min(false_reg->smax_value, false_smax); ++ true_reg->smin_value = max(true_reg->smin_value, true_smin); ++ break; ++ } ++ case BPF_JLE: ++ case BPF_JLT: ++ { ++ set_lower_bound(false_reg, val, is_jmp32, opcode == BPF_JLE); ++ set_upper_bound(true_reg, val, is_jmp32, opcode == BPF_JLT); ++ break; ++ } ++ case BPF_JSLE: ++ case BPF_JSLT: ++ { ++ s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; ++ s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; ++ ++ if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) ++ break; ++ false_reg->smin_value = max(false_reg->smin_value, false_smin); ++ true_reg->smax_value = min(true_reg->smax_value, true_smax); ++ break; ++ } ++ default: ++ break; ++ } ++ ++ __reg_deduce_bounds(false_reg); ++ __reg_deduce_bounds(true_reg); ++ /* We might have learned some bits from the bounds. */ ++ __reg_bound_offset(false_reg); ++ __reg_bound_offset(true_reg); ++ /* Intersecting with the old var_off might have improved our bounds ++ * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), ++ * then new var_off is (0; 0x7f...fc) which improves our umax. ++ */ ++ __update_reg_bounds(false_reg); ++ __update_reg_bounds(true_reg); ++} ++ ++/* Same as above, but for the case that dst_reg holds a constant and src_reg is ++ * the variable reg. ++ */ ++static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, ++ struct bpf_reg_state *false_reg, u64 val, ++ u8 opcode, bool is_jmp32) ++{ ++ s64 sval; ++ ++ if (__is_pointer_value(false, false_reg)) ++ return; ++ ++ val = is_jmp32 ? (u32)val : val; ++ sval = is_jmp32 ? (s64)(s32)val : (s64)val; ++ ++ switch (opcode) { ++ case BPF_JEQ: ++ case BPF_JNE: ++ { ++ struct bpf_reg_state *reg = ++ opcode == BPF_JEQ ? true_reg : false_reg; ++ ++ if (is_jmp32) { ++ u64 old_v = reg->var_off.value; ++ u64 hi_mask = ~0xffffffffULL; ++ ++ reg->var_off.value = (old_v & hi_mask) | val; ++ reg->var_off.mask &= hi_mask; ++ } else { ++ __mark_reg_known(reg, val); ++ } ++ break; ++ } ++ case BPF_JSET: ++ false_reg->var_off = tnum_and(false_reg->var_off, ++ tnum_const(~val)); ++ if (is_power_of_2(val)) ++ true_reg->var_off = tnum_or(true_reg->var_off, ++ tnum_const(val)); ++ break; ++ case BPF_JGE: ++ case BPF_JGT: ++ { ++ set_lower_bound(false_reg, val, is_jmp32, opcode == BPF_JGE); ++ set_upper_bound(true_reg, val, is_jmp32, opcode == BPF_JGT); ++ break; ++ } ++ case BPF_JSGE: ++ case BPF_JSGT: ++ { ++ s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1; ++ s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval; ++ ++ if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) ++ break; ++ false_reg->smin_value = max(false_reg->smin_value, false_smin); ++ true_reg->smax_value = min(true_reg->smax_value, true_smax); ++ break; ++ } ++ case BPF_JLE: ++ case BPF_JLT: ++ { ++ set_upper_bound(false_reg, val, is_jmp32, opcode == BPF_JLE); ++ set_lower_bound(true_reg, val, is_jmp32, opcode == BPF_JLT); ++ break; ++ } ++ case BPF_JSLE: ++ case BPF_JSLT: ++ { ++ s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1; ++ s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval; ++ ++ if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) ++ break; ++ false_reg->smax_value = min(false_reg->smax_value, false_smax); ++ true_reg->smin_value = max(true_reg->smin_value, true_smin); ++ break; ++ } ++ default: ++ break; ++ } ++ ++ __reg_deduce_bounds(false_reg); ++ __reg_deduce_bounds(true_reg); ++ /* We might have learned some bits from the bounds. */ ++ __reg_bound_offset(false_reg); ++ __reg_bound_offset(true_reg); ++ /* Intersecting with the old var_off might have improved our bounds ++ * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), ++ * then new var_off is (0; 0x7f...fc) which improves our umax. ++ */ ++ __update_reg_bounds(false_reg); ++ __update_reg_bounds(true_reg); ++} ++ ++/* Regs are known to be equal, so intersect their min/max/var_off */ ++static void __reg_combine_min_max(struct bpf_reg_state *src_reg, ++ struct bpf_reg_state *dst_reg) ++{ ++ src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value, ++ dst_reg->umin_value); ++ src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value, ++ dst_reg->umax_value); ++ src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value, ++ dst_reg->smin_value); ++ src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value, ++ dst_reg->smax_value); ++ src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, ++ dst_reg->var_off); ++ /* We might have learned new bounds from the var_off. */ ++ __update_reg_bounds(src_reg); ++ __update_reg_bounds(dst_reg); ++ /* We might have learned something about the sign bit. */ ++ __reg_deduce_bounds(src_reg); ++ __reg_deduce_bounds(dst_reg); ++ /* We might have learned some bits from the bounds. */ ++ __reg_bound_offset(src_reg); ++ __reg_bound_offset(dst_reg); ++ /* Intersecting with the old var_off might have improved our bounds ++ * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), ++ * then new var_off is (0; 0x7f...fc) which improves our umax. ++ */ ++ __update_reg_bounds(src_reg); ++ __update_reg_bounds(dst_reg); ++} ++ ++static void reg_combine_min_max(struct bpf_reg_state *true_src, ++ struct bpf_reg_state *true_dst, ++ struct bpf_reg_state *false_src, ++ struct bpf_reg_state *false_dst, ++ u8 opcode) ++{ ++ switch (opcode) { ++ case BPF_JEQ: ++ __reg_combine_min_max(true_src, true_dst); ++ break; ++ case BPF_JNE: ++ __reg_combine_min_max(false_src, false_dst); ++ break; ++ } ++} ++ ++static void mark_ptr_or_null_reg(struct bpf_func_state *state, ++ struct bpf_reg_state *reg, u32 id, ++ bool is_null) ++{ ++ if (reg_type_may_be_null(reg->type) && reg->id == id) { ++ /* Old offset (both fixed and variable parts) should ++ * have been known-zero, because we don't allow pointer ++ * arithmetic on pointers that might be NULL. ++ */ ++ if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || ++ !tnum_equals_const(reg->var_off, 0) || ++ reg->off)) { ++ __mark_reg_known_zero(reg); ++ reg->off = 0; ++ } ++ if (is_null) { ++ reg->type = SCALAR_VALUE; ++ } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) { ++ if (reg->map_ptr->inner_map_meta) { ++ reg->type = CONST_PTR_TO_MAP; ++ reg->map_ptr = reg->map_ptr->inner_map_meta; ++ } else if (reg->map_ptr->map_type == ++ BPF_MAP_TYPE_XSKMAP) { ++ reg->type = PTR_TO_XDP_SOCK; ++ } else { ++ reg->type = PTR_TO_MAP_VALUE; ++ } ++ } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { ++ reg->type = PTR_TO_SOCKET; ++ } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { ++ reg->type = PTR_TO_SOCK_COMMON; ++ } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { ++ reg->type = PTR_TO_TCP_SOCK; ++ } ++ if (is_null) { ++ /* We don't need id and ref_obj_id from this point ++ * onwards anymore, thus we should better reset it, ++ * so that state pruning has chances to take effect. ++ */ ++ reg->id = 0; ++ reg->ref_obj_id = 0; ++ } else if (!reg_may_point_to_spin_lock(reg)) { ++ /* For not-NULL ptr, reg->ref_obj_id will be reset ++ * in release_reg_references(). ++ * ++ * reg->id is still used by spin_lock ptr. Other ++ * than spin_lock ptr type, reg->id can be reset. ++ */ ++ reg->id = 0; ++ } ++ } ++} ++ ++static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, ++ bool is_null) ++{ ++ struct bpf_reg_state *reg; ++ int i; ++ ++ for (i = 0; i < MAX_BPF_REG; i++) ++ mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); ++ ++ bpf_for_each_spilled_reg(i, state, reg) { ++ if (!reg) ++ continue; ++ mark_ptr_or_null_reg(state, reg, id, is_null); ++ } ++} ++ ++/* The logic is similar to find_good_pkt_pointers(), both could eventually ++ * be folded together at some point. ++ */ ++static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, ++ bool is_null) ++{ ++ struct bpf_func_state *state = vstate->frame[vstate->curframe]; ++ struct bpf_reg_state *regs = state->regs; ++ u32 ref_obj_id = regs[regno].ref_obj_id; ++ u32 id = regs[regno].id; ++ int i; ++ ++ if (ref_obj_id && ref_obj_id == id && is_null) ++ /* regs[regno] is in the " == NULL" branch. ++ * No one could have freed the reference state before ++ * doing the NULL check. ++ */ ++ WARN_ON_ONCE(release_reference_state(state, id)); ++ ++ for (i = 0; i <= vstate->curframe; i++) ++ __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); ++} ++ ++static bool try_match_pkt_pointers(const struct bpf_insn *insn, ++ struct bpf_reg_state *dst_reg, ++ struct bpf_reg_state *src_reg, ++ struct bpf_verifier_state *this_branch, ++ struct bpf_verifier_state *other_branch) ++{ ++ if (BPF_SRC(insn->code) != BPF_X) ++ return false; ++ ++ /* Pointers are always 64-bit. */ ++ if (BPF_CLASS(insn->code) == BPF_JMP32) ++ return false; ++ ++ switch (BPF_OP(insn->code)) { ++ case BPF_JGT: ++ if ((dst_reg->type == PTR_TO_PACKET && ++ src_reg->type == PTR_TO_PACKET_END) || ++ (dst_reg->type == PTR_TO_PACKET_META && ++ reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { ++ /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ ++ find_good_pkt_pointers(this_branch, dst_reg, ++ dst_reg->type, false); ++ } else if ((dst_reg->type == PTR_TO_PACKET_END && ++ src_reg->type == PTR_TO_PACKET) || ++ (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ++ src_reg->type == PTR_TO_PACKET_META)) { ++ /* pkt_end > pkt_data', pkt_data > pkt_meta' */ ++ find_good_pkt_pointers(other_branch, src_reg, ++ src_reg->type, true); ++ } else { ++ return false; ++ } ++ break; ++ case BPF_JLT: ++ if ((dst_reg->type == PTR_TO_PACKET && ++ src_reg->type == PTR_TO_PACKET_END) || ++ (dst_reg->type == PTR_TO_PACKET_META && ++ reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { ++ /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ ++ find_good_pkt_pointers(other_branch, dst_reg, ++ dst_reg->type, true); ++ } else if ((dst_reg->type == PTR_TO_PACKET_END && ++ src_reg->type == PTR_TO_PACKET) || ++ (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ++ src_reg->type == PTR_TO_PACKET_META)) { ++ /* pkt_end < pkt_data', pkt_data > pkt_meta' */ ++ find_good_pkt_pointers(this_branch, src_reg, ++ src_reg->type, false); ++ } else { ++ return false; ++ } ++ break; ++ case BPF_JGE: ++ if ((dst_reg->type == PTR_TO_PACKET && ++ src_reg->type == PTR_TO_PACKET_END) || ++ (dst_reg->type == PTR_TO_PACKET_META && ++ reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { ++ /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ ++ find_good_pkt_pointers(this_branch, dst_reg, ++ dst_reg->type, true); ++ } else if ((dst_reg->type == PTR_TO_PACKET_END && ++ src_reg->type == PTR_TO_PACKET) || ++ (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ++ src_reg->type == PTR_TO_PACKET_META)) { ++ /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ ++ find_good_pkt_pointers(other_branch, src_reg, ++ src_reg->type, false); ++ } else { ++ return false; ++ } ++ break; ++ case BPF_JLE: ++ if ((dst_reg->type == PTR_TO_PACKET && ++ src_reg->type == PTR_TO_PACKET_END) || ++ (dst_reg->type == PTR_TO_PACKET_META && ++ reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { ++ /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ ++ find_good_pkt_pointers(other_branch, dst_reg, ++ dst_reg->type, false); ++ } else if ((dst_reg->type == PTR_TO_PACKET_END && ++ src_reg->type == PTR_TO_PACKET) || ++ (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ++ src_reg->type == PTR_TO_PACKET_META)) { ++ /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ ++ find_good_pkt_pointers(this_branch, src_reg, ++ src_reg->type, true); ++ } else { ++ return false; ++ } ++ break; ++ default: ++ return false; ++ } ++ ++ return true; ++} ++ ++static int check_cond_jmp_op(struct bpf_verifier_env *env, + struct bpf_insn *insn, int *insn_idx) + { +- struct reg_state *regs = env->cur_state.regs; +- struct verifier_state *other_branch; ++ struct bpf_verifier_state *this_branch = env->cur_state; ++ struct bpf_verifier_state *other_branch; ++ struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; ++ struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; + u8 opcode = BPF_OP(insn->code); ++ bool is_jmp32; ++ int pred = -1; + int err; + +- if (opcode > BPF_EXIT) { +- verbose("invalid BPF_JMP opcode %x\n", opcode); ++ /* Only conditional jumps are expected to reach here. */ ++ if (opcode == BPF_JA || opcode > BPF_JSLE) { ++ verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode); + return -EINVAL; + } + + if (BPF_SRC(insn->code) == BPF_X) { + if (insn->imm != 0) { +- verbose("BPF_JMP uses reserved fields\n"); ++ verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); + return -EINVAL; + } + + /* check src1 operand */ +- err = check_reg_arg(regs, insn->src_reg, SRC_OP); ++ err = check_reg_arg(env, insn->src_reg, SRC_OP); + if (err) + return err; + + if (is_pointer_value(env, insn->src_reg)) { +- verbose("R%d pointer comparison prohibited\n", ++ verbose(env, "R%d pointer comparison prohibited\n", + insn->src_reg); + return -EACCES; + } ++ src_reg = ®s[insn->src_reg]; + } else { + if (insn->src_reg != BPF_REG_0) { +- verbose("BPF_JMP uses reserved fields\n"); ++ verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); + return -EINVAL; + } + } + + /* check src2 operand */ +- err = check_reg_arg(regs, insn->dst_reg, SRC_OP); ++ err = check_reg_arg(env, insn->dst_reg, SRC_OP); + if (err) + return err; + +- /* detect if R == 0 where R was initialized to zero earlier */ +- if (BPF_SRC(insn->code) == BPF_K && +- (opcode == BPF_JEQ || opcode == BPF_JNE) && +- regs[insn->dst_reg].type == CONST_IMM && +- regs[insn->dst_reg].imm == insn->imm) { +- if (opcode == BPF_JEQ) { +- /* if (imm == imm) goto pc+off; +- * only follow the goto, ignore fall-through +- */ +- *insn_idx += insn->off; +- return 0; +- } else { +- /* if (imm != imm) goto pc+off; +- * only follow fall-through branch, since +- * that's where the program will go +- */ +- return 0; +- } ++ dst_reg = ®s[insn->dst_reg]; ++ is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; ++ ++ if (BPF_SRC(insn->code) == BPF_K) ++ pred = is_branch_taken(dst_reg, insn->imm, ++ opcode, is_jmp32); ++ else if (src_reg->type == SCALAR_VALUE && ++ tnum_is_const(src_reg->var_off)) ++ pred = is_branch_taken(dst_reg, src_reg->var_off.value, ++ opcode, is_jmp32); ++ if (pred >= 0) { ++ err = mark_chain_precision(env, insn->dst_reg); ++ if (BPF_SRC(insn->code) == BPF_X && !err) ++ err = mark_chain_precision(env, insn->src_reg); ++ if (err) ++ return err; ++ } ++ ++ if (pred == 1) { ++ /* Only follow the goto, ignore fall-through. If needed, push ++ * the fall-through branch for simulation under speculative ++ * execution. ++ */ ++ if (!env->allow_ptr_leaks && ++ !sanitize_speculative_path(env, insn, *insn_idx + 1, ++ *insn_idx)) ++ return -EFAULT; ++ *insn_idx += insn->off; ++ return 0; ++ } else if (pred == 0) { ++ /* Only follow the fall-through branch, since that's where the ++ * program will go. If needed, push the goto branch for ++ * simulation under speculative execution. ++ */ ++ if (!env->allow_ptr_leaks && ++ !sanitize_speculative_path(env, insn, ++ *insn_idx + insn->off + 1, ++ *insn_idx)) ++ return -EFAULT; ++ return 0; + } + +- other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx); ++ other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, ++ false); + if (!other_branch) + return -EFAULT; ++ other_branch_regs = other_branch->frame[other_branch->curframe]->regs; + +- /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */ +- if (BPF_SRC(insn->code) == BPF_K && +- insn->imm == 0 && (opcode == BPF_JEQ || +- opcode == BPF_JNE) && +- regs[insn->dst_reg].type == PTR_TO_MAP_VALUE_OR_NULL) { +- if (opcode == BPF_JEQ) { +- /* next fallthrough insn can access memory via +- * this register +- */ +- regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; +- /* branch targer cannot access it, since reg == 0 */ +- other_branch->regs[insn->dst_reg].type = CONST_IMM; +- other_branch->regs[insn->dst_reg].imm = 0; +- } else { +- other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; +- regs[insn->dst_reg].type = CONST_IMM; +- regs[insn->dst_reg].imm = 0; +- } +- } else if (is_pointer_value(env, insn->dst_reg)) { +- verbose("R%d pointer comparison prohibited\n", insn->dst_reg); ++ /* detect if we are comparing against a constant value so we can adjust ++ * our min/max values for our dst register. ++ * this is only legit if both are scalars (or pointers to the same ++ * object, I suppose, but we don't support that right now), because ++ * otherwise the different base pointers mean the offsets aren't ++ * comparable. ++ */ ++ if (BPF_SRC(insn->code) == BPF_X) { ++ struct bpf_reg_state *src_reg = ®s[insn->src_reg]; ++ struct bpf_reg_state lo_reg0 = *dst_reg; ++ struct bpf_reg_state lo_reg1 = *src_reg; ++ struct bpf_reg_state *src_lo, *dst_lo; ++ ++ dst_lo = &lo_reg0; ++ src_lo = &lo_reg1; ++ coerce_reg_to_size(dst_lo, 4); ++ coerce_reg_to_size(src_lo, 4); ++ ++ if (dst_reg->type == SCALAR_VALUE && ++ src_reg->type == SCALAR_VALUE) { ++ if (tnum_is_const(src_reg->var_off) || ++ (is_jmp32 && tnum_is_const(src_lo->var_off))) ++ reg_set_min_max(&other_branch_regs[insn->dst_reg], ++ dst_reg, ++ is_jmp32 ++ ? src_lo->var_off.value ++ : src_reg->var_off.value, ++ opcode, is_jmp32); ++ else if (tnum_is_const(dst_reg->var_off) || ++ (is_jmp32 && tnum_is_const(dst_lo->var_off))) ++ reg_set_min_max_inv(&other_branch_regs[insn->src_reg], ++ src_reg, ++ is_jmp32 ++ ? dst_lo->var_off.value ++ : dst_reg->var_off.value, ++ opcode, is_jmp32); ++ else if (!is_jmp32 && ++ (opcode == BPF_JEQ || opcode == BPF_JNE)) ++ /* Comparing for equality, we can combine knowledge */ ++ reg_combine_min_max(&other_branch_regs[insn->src_reg], ++ &other_branch_regs[insn->dst_reg], ++ src_reg, dst_reg, opcode); ++ } ++ } else if (dst_reg->type == SCALAR_VALUE) { ++ reg_set_min_max(&other_branch_regs[insn->dst_reg], ++ dst_reg, insn->imm, opcode, is_jmp32); ++ } ++ ++ /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). ++ * NOTE: these optimizations below are related with pointer comparison ++ * which will never be JMP32. ++ */ ++ if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && ++ insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && ++ reg_type_may_be_null(dst_reg->type)) { ++ /* Mark all identical registers in each branch as either ++ * safe or unknown depending R == 0 or R != 0 conditional. ++ */ ++ mark_ptr_or_null_regs(this_branch, insn->dst_reg, ++ opcode == BPF_JNE); ++ mark_ptr_or_null_regs(other_branch, insn->dst_reg, ++ opcode == BPF_JEQ); ++ } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg], ++ this_branch, other_branch) && ++ is_pointer_value(env, insn->dst_reg)) { ++ verbose(env, "R%d pointer comparison prohibited\n", ++ insn->dst_reg); + return -EACCES; +- } else if (BPF_SRC(insn->code) == BPF_K && +- (opcode == BPF_JEQ || opcode == BPF_JNE)) { +- +- if (opcode == BPF_JEQ) { +- /* detect if (R == imm) goto +- * and in the target state recognize that R = imm +- */ +- other_branch->regs[insn->dst_reg].type = CONST_IMM; +- other_branch->regs[insn->dst_reg].imm = insn->imm; +- } else { +- /* detect if (R != imm) goto +- * and in the fall-through state recognize that R = imm +- */ +- regs[insn->dst_reg].type = CONST_IMM; +- regs[insn->dst_reg].imm = insn->imm; +- } + } +- if (log_level) +- print_verifier_state(env); ++ if (env->log.level & BPF_LOG_LEVEL) ++ print_verifier_state(env, this_branch->frame[this_branch->curframe]); + return 0; + } + +-/* return the map pointer stored inside BPF_LD_IMM64 instruction */ +-static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) +-{ +- u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32; +- +- return (struct bpf_map *) (unsigned long) imm64; +-} +- + /* verify BPF_LD_IMM64 instruction */ +-static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) ++static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) + { +- struct reg_state *regs = env->cur_state.regs; ++ struct bpf_insn_aux_data *aux = cur_aux(env); ++ struct bpf_reg_state *regs = cur_regs(env); ++ struct bpf_map *map; + int err; + + if (BPF_SIZE(insn->code) != BPF_DW) { +- verbose("invalid BPF_LD_IMM insn\n"); ++ verbose(env, "invalid BPF_LD_IMM insn\n"); + return -EINVAL; + } + if (insn->off != 0) { +- verbose("BPF_LD_IMM64 uses reserved fields\n"); ++ verbose(env, "BPF_LD_IMM64 uses reserved fields\n"); + return -EINVAL; + } + +- err = check_reg_arg(regs, insn->dst_reg, DST_OP); ++ err = check_reg_arg(env, insn->dst_reg, DST_OP); + if (err) + return err; + +- if (insn->src_reg == 0) +- /* generic move 64-bit immediate into a register */ ++ if (insn->src_reg == 0) { ++ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; ++ ++ regs[insn->dst_reg].type = SCALAR_VALUE; ++ __mark_reg_known(®s[insn->dst_reg], imm); + return 0; ++ } + +- /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */ +- BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD); ++ map = env->used_maps[aux->map_index]; ++ mark_reg_known_zero(env, regs, insn->dst_reg); ++ regs[insn->dst_reg].map_ptr = map; ++ ++ if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) { ++ regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; ++ regs[insn->dst_reg].off = aux->map_off; ++ if (map_value_has_spin_lock(map)) ++ regs[insn->dst_reg].id = ++env->id_gen; ++ } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) { ++ regs[insn->dst_reg].type = CONST_PTR_TO_MAP; ++ } else { ++ verbose(env, "bpf verifier is misconfigured\n"); ++ return -EINVAL; ++ } + +- regs[insn->dst_reg].type = CONST_PTR_TO_MAP; +- regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn); + return 0; + } + +@@ -1357,53 +6272,148 @@ static bool may_access_skb(enum bpf_prog + * Output: + * R0 - 8/16/32-bit skb data converted to cpu endianness + */ +-static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) ++static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) + { +- struct reg_state *regs = env->cur_state.regs; ++ struct bpf_reg_state *regs = cur_regs(env); ++ static const int ctx_reg = BPF_REG_6; + u8 mode = BPF_MODE(insn->code); +- struct reg_state *reg; + int i, err; + + if (!may_access_skb(env->prog->type)) { +- verbose("BPF_LD_ABS|IND instructions not allowed for this program type\n"); ++ verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n"); ++ return -EINVAL; ++ } ++ ++ if (!env->ops->gen_ld_abs) { ++ verbose(env, "bpf verifier is misconfigured\n"); ++ return -EINVAL; ++ } ++ ++ if (env->subprog_cnt > 1) { ++ /* when program has LD_ABS insn JITs and interpreter assume ++ * that r1 == ctx == skb which is not the case for callees ++ * that can have arbitrary arguments. It's problematic ++ * for main prog as well since JITs would need to analyze ++ * all functions in order to make proper register save/restore ++ * decisions in the main prog. Hence disallow LD_ABS with calls ++ */ ++ verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n"); + return -EINVAL; + } + + if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || + BPF_SIZE(insn->code) == BPF_DW || + (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { +- verbose("BPF_LD_ABS uses reserved fields\n"); ++ verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n"); + return -EINVAL; + } + + /* check whether implicit source operand (register R6) is readable */ +- err = check_reg_arg(regs, BPF_REG_6, SRC_OP); ++ err = check_reg_arg(env, ctx_reg, SRC_OP); + if (err) + return err; + +- if (regs[BPF_REG_6].type != PTR_TO_CTX) { +- verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); ++ /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as ++ * gen_ld_abs() may terminate the program at runtime, leading to ++ * reference leak. ++ */ ++ err = check_reference_leak(env); ++ if (err) { ++ verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n"); ++ return err; ++ } ++ ++ if (env->cur_state->active_spin_lock) { ++ verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n"); ++ return -EINVAL; ++ } ++ ++ if (regs[ctx_reg].type != PTR_TO_CTX) { ++ verbose(env, ++ "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); + return -EINVAL; + } + + if (mode == BPF_IND) { + /* check explicit source operand */ +- err = check_reg_arg(regs, insn->src_reg, SRC_OP); ++ err = check_reg_arg(env, insn->src_reg, SRC_OP); + if (err) + return err; + } + ++ err = check_ctx_reg(env, ®s[ctx_reg], ctx_reg); ++ if (err < 0) ++ return err; ++ + /* reset caller saved regs to unreadable */ + for (i = 0; i < CALLER_SAVED_REGS; i++) { +- reg = regs + caller_saved[i]; +- reg->type = NOT_INIT; +- reg->imm = 0; ++ mark_reg_not_init(env, regs, caller_saved[i]); ++ check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); + } + + /* mark destination R0 register as readable, since it contains +- * the value fetched from the packet ++ * the value fetched from the packet. ++ * Already marked as written above. + */ +- regs[BPF_REG_0].type = UNKNOWN_VALUE; ++ mark_reg_unknown(env, regs, BPF_REG_0); ++ /* ld_abs load up to 32-bit skb data. */ ++ regs[BPF_REG_0].subreg_def = env->insn_idx + 1; ++ return 0; ++} ++ ++static int check_return_code(struct bpf_verifier_env *env) ++{ ++ struct tnum enforce_attach_type_range = tnum_unknown; ++ struct bpf_reg_state *reg; ++ struct tnum range = tnum_range(0, 1); ++ ++ switch (env->prog->type) { ++ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: ++ if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || ++ env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG) ++ range = tnum_range(1, 1); ++ break; ++ case BPF_PROG_TYPE_CGROUP_SKB: ++ if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) { ++ range = tnum_range(0, 3); ++ enforce_attach_type_range = tnum_range(2, 3); ++ } ++ break; ++ case BPF_PROG_TYPE_CGROUP_SOCK: ++ case BPF_PROG_TYPE_SOCK_OPS: ++ case BPF_PROG_TYPE_CGROUP_DEVICE: ++ case BPF_PROG_TYPE_CGROUP_SYSCTL: ++ case BPF_PROG_TYPE_CGROUP_SOCKOPT: ++ break; ++ default: ++ return 0; ++ } ++ ++ reg = cur_regs(env) + BPF_REG_0; ++ if (reg->type != SCALAR_VALUE) { ++ verbose(env, "At program exit the register R0 is not a known value (%s)\n", ++ reg_type_str[reg->type]); ++ return -EINVAL; ++ } ++ ++ if (!tnum_in(range, reg->var_off)) { ++ char tn_buf[48]; ++ ++ verbose(env, "At program exit the register R0 "); ++ if (!tnum_is_unknown(reg->var_off)) { ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, "has value %s", tn_buf); ++ } else { ++ verbose(env, "has unknown scalar value"); ++ } ++ tnum_strn(tn_buf, sizeof(tn_buf), range); ++ verbose(env, " should have been in %s\n", tn_buf); ++ return -EINVAL; ++ } ++ ++ if (!tnum_is_unknown(enforce_attach_type_range) && ++ tnum_in(enforce_attach_type_range, reg->var_off)) ++ env->prog->enforce_expected_attach_type = 1; + return 0; + } + +@@ -1447,19 +6457,37 @@ enum { + BRANCH = 2, + }; + +-#define STATE_LIST_MARK ((struct verifier_state_list *) -1L) ++static u32 state_htab_size(struct bpf_verifier_env *env) ++{ ++ return env->prog->len; ++} ++ ++static struct bpf_verifier_state_list **explored_state( ++ struct bpf_verifier_env *env, ++ int idx) ++{ ++ struct bpf_verifier_state *cur = env->cur_state; ++ struct bpf_func_state *state = cur->frame[cur->curframe]; ++ ++ return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; ++} + +-static int *insn_stack; /* stack of insns to process */ +-static int cur_stack; /* current stack index */ +-static int *insn_state; ++static void init_explored_state(struct bpf_verifier_env *env, int idx) ++{ ++ env->insn_aux_data[idx].prune_point = true; ++} + + /* t, w, e - match pseudo-code above: + * t - index of current instruction + * w - next instruction + * e - edge + */ +-static int push_insn(int t, int w, int e, struct verifier_env *env) ++static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, ++ bool loop_ok) + { ++ int *insn_stack = env->cfg.insn_stack; ++ int *insn_state = env->cfg.insn_state; ++ + if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) + return 0; + +@@ -1467,30 +6495,35 @@ static int push_insn(int t, int w, int e + return 0; + + if (w < 0 || w >= env->prog->len) { +- verbose("jump out of range from insn %d to %d\n", t, w); ++ verbose_linfo(env, t, "%d: ", t); ++ verbose(env, "jump out of range from insn %d to %d\n", t, w); + return -EINVAL; + } + + if (e == BRANCH) + /* mark branch target for state pruning */ +- env->explored_states[w] = STATE_LIST_MARK; ++ init_explored_state(env, w); + + if (insn_state[w] == 0) { + /* tree-edge */ + insn_state[t] = DISCOVERED | e; + insn_state[w] = DISCOVERED; +- if (cur_stack >= env->prog->len) ++ if (env->cfg.cur_stack >= env->prog->len) + return -E2BIG; +- insn_stack[cur_stack++] = w; ++ insn_stack[env->cfg.cur_stack++] = w; + return 1; + } else if ((insn_state[w] & 0xF0) == DISCOVERED) { +- verbose("back-edge from insn %d to %d\n", t, w); ++ if (loop_ok && env->allow_ptr_leaks) ++ return 0; ++ verbose_linfo(env, t, "%d: ", t); ++ verbose_linfo(env, w, "%d: ", w); ++ verbose(env, "back-edge from insn %d to %d\n", t, w); + return -EINVAL; + } else if (insn_state[w] == EXPLORED) { + /* forward- or cross-edge */ + insn_state[t] = DISCOVERED | e; + } else { +- verbose("insn state internal bug\n"); ++ verbose(env, "insn state internal bug\n"); + return -EFAULT; + } + return 0; +@@ -1499,43 +6532,56 @@ static int push_insn(int t, int w, int e + /* non-recursive depth-first-search to detect loops in BPF program + * loop == back-edge in directed graph + */ +-static int check_cfg(struct verifier_env *env) ++static int check_cfg(struct bpf_verifier_env *env) + { + struct bpf_insn *insns = env->prog->insnsi; + int insn_cnt = env->prog->len; ++ int *insn_stack, *insn_state; + int ret = 0; + int i, t; + +- insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); ++ insn_state = env->cfg.insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + if (!insn_state) + return -ENOMEM; + +- insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); ++ insn_stack = env->cfg.insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + if (!insn_stack) { +- kfree(insn_state); ++ kvfree(insn_state); + return -ENOMEM; + } + + insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ + insn_stack[0] = 0; /* 0 is the first instruction */ +- cur_stack = 1; ++ env->cfg.cur_stack = 1; + + peek_stack: +- if (cur_stack == 0) ++ if (env->cfg.cur_stack == 0) + goto check_state; +- t = insn_stack[cur_stack - 1]; ++ t = insn_stack[env->cfg.cur_stack - 1]; + +- if (BPF_CLASS(insns[t].code) == BPF_JMP) { ++ if (BPF_CLASS(insns[t].code) == BPF_JMP || ++ BPF_CLASS(insns[t].code) == BPF_JMP32) { + u8 opcode = BPF_OP(insns[t].code); + + if (opcode == BPF_EXIT) { + goto mark_explored; + } else if (opcode == BPF_CALL) { +- ret = push_insn(t, t + 1, FALLTHROUGH, env); ++ ret = push_insn(t, t + 1, FALLTHROUGH, env, false); + if (ret == 1) + goto peek_stack; + else if (ret < 0) + goto err_free; ++ if (t + 1 < insn_cnt) ++ init_explored_state(env, t + 1); ++ if (insns[t].src_reg == BPF_PSEUDO_CALL) { ++ init_explored_state(env, t); ++ ret = push_insn(t, t + insns[t].imm + 1, BRANCH, ++ env, false); ++ if (ret == 1) ++ goto peek_stack; ++ else if (ret < 0) ++ goto err_free; ++ } + } else if (opcode == BPF_JA) { + if (BPF_SRC(insns[t].code) != BPF_K) { + ret = -EINVAL; +@@ -1543,25 +6589,31 @@ peek_stack: + } + /* unconditional jump with single edge */ + ret = push_insn(t, t + insns[t].off + 1, +- FALLTHROUGH, env); ++ FALLTHROUGH, env, true); + if (ret == 1) + goto peek_stack; + else if (ret < 0) + goto err_free; ++ /* unconditional jmp is not a good pruning point, ++ * but it's marked, since backtracking needs ++ * to record jmp history in is_state_visited(). ++ */ ++ init_explored_state(env, t + insns[t].off + 1); + /* tell verifier to check for equivalent states + * after every call and jump + */ + if (t + 1 < insn_cnt) +- env->explored_states[t + 1] = STATE_LIST_MARK; ++ init_explored_state(env, t + 1); + } else { + /* conditional jump with two edges */ +- ret = push_insn(t, t + 1, FALLTHROUGH, env); ++ init_explored_state(env, t); ++ ret = push_insn(t, t + 1, FALLTHROUGH, env, true); + if (ret == 1) + goto peek_stack; + else if (ret < 0) + goto err_free; + +- ret = push_insn(t, t + insns[t].off + 1, BRANCH, env); ++ ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true); + if (ret == 1) + goto peek_stack; + else if (ret < 0) +@@ -1571,7 +6623,7 @@ peek_stack: + /* all other non-branch instructions with single + * fall-through edge + */ +- ret = push_insn(t, t + 1, FALLTHROUGH, env); ++ ret = push_insn(t, t + 1, FALLTHROUGH, env, false); + if (ret == 1) + goto peek_stack; + else if (ret < 0) +@@ -1580,8 +6632,8 @@ peek_stack: + + mark_explored: + insn_state[t] = EXPLORED; +- if (cur_stack-- <= 0) { +- verbose("pop stack internal bug\n"); ++ if (env->cfg.cur_stack-- <= 0) { ++ verbose(env, "pop stack internal bug\n"); + ret = -EFAULT; + goto err_free; + } +@@ -1590,7 +6642,7 @@ mark_explored: + check_state: + for (i = 0; i < insn_cnt; i++) { + if (insn_state[i] != EXPLORED) { +- verbose("unreachable insn %d\n", i); ++ verbose(env, "unreachable insn %d\n", i); + ret = -EINVAL; + goto err_free; + } +@@ -1598,11 +6650,616 @@ check_state: + ret = 0; /* cfg looks good */ + + err_free: +- kfree(insn_state); +- kfree(insn_stack); ++ kvfree(insn_state); ++ kvfree(insn_stack); ++ env->cfg.insn_state = env->cfg.insn_stack = NULL; + return ret; + } + ++/* The minimum supported BTF func info size */ ++#define MIN_BPF_FUNCINFO_SIZE 8 ++#define MAX_FUNCINFO_REC_SIZE 252 ++ ++static int check_btf_func(struct bpf_verifier_env *env, ++ const union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ u32 i, nfuncs, urec_size, min_size; ++ u32 krec_size = sizeof(struct bpf_func_info); ++ struct bpf_func_info *krecord; ++ const struct btf_type *type; ++ struct bpf_prog *prog; ++ const struct btf *btf; ++ void __user *urecord; ++ u32 prev_offset = 0; ++ int ret = 0; ++ ++ nfuncs = attr->func_info_cnt; ++ if (!nfuncs) ++ return 0; ++ ++ if (nfuncs != env->subprog_cnt) { ++ verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); ++ return -EINVAL; ++ } ++ ++ urec_size = attr->func_info_rec_size; ++ if (urec_size < MIN_BPF_FUNCINFO_SIZE || ++ urec_size > MAX_FUNCINFO_REC_SIZE || ++ urec_size % sizeof(u32)) { ++ verbose(env, "invalid func info rec size %u\n", urec_size); ++ return -EINVAL; ++ } ++ ++ prog = env->prog; ++ btf = prog->aux->btf; ++ ++ urecord = u64_to_user_ptr(attr->func_info); ++ min_size = min_t(u32, krec_size, urec_size); ++ ++ krecord = kcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); ++ if (!krecord) ++ return -ENOMEM; ++ ++ for (i = 0; i < nfuncs; i++) { ++ ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); ++ if (ret) { ++ if (ret == -E2BIG) { ++ verbose(env, "nonzero tailing record in func info"); ++ /* set the size kernel expects so loader can zero ++ * out the rest of the record. ++ */ ++ if (put_user(min_size, &uattr->func_info_rec_size)) ++ ret = -EFAULT; ++ } ++ goto err_free; ++ } ++ ++ if (copy_from_user(&krecord[i], urecord, min_size)) { ++ ret = -EFAULT; ++ goto err_free; ++ } ++ ++ /* check insn_off */ ++ if (i == 0) { ++ if (krecord[i].insn_off) { ++ verbose(env, ++ "nonzero insn_off %u for the first func info record", ++ krecord[i].insn_off); ++ ret = -EINVAL; ++ goto err_free; ++ } ++ } else if (krecord[i].insn_off <= prev_offset) { ++ verbose(env, ++ "same or smaller insn offset (%u) than previous func info record (%u)", ++ krecord[i].insn_off, prev_offset); ++ ret = -EINVAL; ++ goto err_free; ++ } ++ ++ if (env->subprog_info[i].start != krecord[i].insn_off) { ++ verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); ++ ret = -EINVAL; ++ goto err_free; ++ } ++ ++ /* check type_id */ ++ type = btf_type_by_id(btf, krecord[i].type_id); ++ if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) { ++ verbose(env, "invalid type id %d in func info", ++ krecord[i].type_id); ++ ret = -EINVAL; ++ goto err_free; ++ } ++ ++ prev_offset = krecord[i].insn_off; ++ urecord += urec_size; ++ } ++ ++ prog->aux->func_info = krecord; ++ prog->aux->func_info_cnt = nfuncs; ++ return 0; ++ ++err_free: ++ kvfree(krecord); ++ return ret; ++} ++ ++static void adjust_btf_func(struct bpf_verifier_env *env) ++{ ++ int i; ++ ++ if (!env->prog->aux->func_info) ++ return; ++ ++ for (i = 0; i < env->subprog_cnt; i++) ++ env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start; ++} ++ ++#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \ ++ sizeof(((struct bpf_line_info *)(0))->line_col)) ++#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE ++ ++static int check_btf_line(struct bpf_verifier_env *env, ++ const union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0; ++ struct bpf_subprog_info *sub; ++ struct bpf_line_info *linfo; ++ struct bpf_prog *prog; ++ const struct btf *btf; ++ void __user *ulinfo; ++ int err; ++ ++ nr_linfo = attr->line_info_cnt; ++ if (!nr_linfo) ++ return 0; ++ ++ rec_size = attr->line_info_rec_size; ++ if (rec_size < MIN_BPF_LINEINFO_SIZE || ++ rec_size > MAX_LINEINFO_REC_SIZE || ++ rec_size & (sizeof(u32) - 1)) ++ return -EINVAL; ++ ++ /* Need to zero it in case the userspace may ++ * pass in a smaller bpf_line_info object. ++ */ ++ linfo = kcalloc(nr_linfo, sizeof(struct bpf_line_info), ++ GFP_KERNEL | __GFP_NOWARN); ++ if (!linfo) ++ return -ENOMEM; ++ ++ prog = env->prog; ++ btf = prog->aux->btf; ++ ++ s = 0; ++ sub = env->subprog_info; ++ ulinfo = u64_to_user_ptr(attr->line_info); ++ expected_size = sizeof(struct bpf_line_info); ++ ncopy = min_t(u32, expected_size, rec_size); ++ for (i = 0; i < nr_linfo; i++) { ++ err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size); ++ if (err) { ++ if (err == -E2BIG) { ++ verbose(env, "nonzero tailing record in line_info"); ++ if (put_user(expected_size, ++ &uattr->line_info_rec_size)) ++ err = -EFAULT; ++ } ++ goto err_free; ++ } ++ ++ if (copy_from_user(&linfo[i], ulinfo, ncopy)) { ++ err = -EFAULT; ++ goto err_free; ++ } ++ ++ /* ++ * Check insn_off to ensure ++ * 1) strictly increasing AND ++ * 2) bounded by prog->len ++ * ++ * The linfo[0].insn_off == 0 check logically falls into ++ * the later "missing bpf_line_info for func..." case ++ * because the first linfo[0].insn_off must be the ++ * first sub also and the first sub must have ++ * subprog_info[0].start == 0. ++ */ ++ if ((i && linfo[i].insn_off <= prev_offset) || ++ linfo[i].insn_off >= prog->len) { ++ verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n", ++ i, linfo[i].insn_off, prev_offset, ++ prog->len); ++ err = -EINVAL; ++ goto err_free; ++ } ++ ++ if (!prog->insnsi[linfo[i].insn_off].code) { ++ verbose(env, ++ "Invalid insn code at line_info[%u].insn_off\n", ++ i); ++ err = -EINVAL; ++ goto err_free; ++ } ++ ++ if (!btf_name_by_offset(btf, linfo[i].line_off) || ++ !btf_name_by_offset(btf, linfo[i].file_name_off)) { ++ verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i); ++ err = -EINVAL; ++ goto err_free; ++ } ++ ++ if (s != env->subprog_cnt) { ++ if (linfo[i].insn_off == sub[s].start) { ++ sub[s].linfo_idx = i; ++ s++; ++ } else if (sub[s].start < linfo[i].insn_off) { ++ verbose(env, "missing bpf_line_info for func#%u\n", s); ++ err = -EINVAL; ++ goto err_free; ++ } ++ } ++ ++ prev_offset = linfo[i].insn_off; ++ ulinfo += rec_size; ++ } ++ ++ if (s != env->subprog_cnt) { ++ verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n", ++ env->subprog_cnt - s, s); ++ err = -EINVAL; ++ goto err_free; ++ } ++ ++ prog->aux->linfo = linfo; ++ prog->aux->nr_linfo = nr_linfo; ++ ++ return 0; ++ ++err_free: ++ kvfree(linfo); ++ return err; ++} ++ ++static int check_btf_info(struct bpf_verifier_env *env, ++ const union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ struct btf *btf; ++ int err; ++ ++ if (!attr->func_info_cnt && !attr->line_info_cnt) ++ return 0; ++ ++ btf = btf_get_by_fd(attr->prog_btf_fd); ++ if (IS_ERR(btf)) ++ return PTR_ERR(btf); ++ env->prog->aux->btf = btf; ++ ++ err = check_btf_func(env, attr, uattr); ++ if (err) ++ return err; ++ ++ err = check_btf_line(env, attr, uattr); ++ if (err) ++ return err; ++ ++ return 0; ++} ++ ++/* check %cur's range satisfies %old's */ ++static bool range_within(struct bpf_reg_state *old, ++ struct bpf_reg_state *cur) ++{ ++ return old->umin_value <= cur->umin_value && ++ old->umax_value >= cur->umax_value && ++ old->smin_value <= cur->smin_value && ++ old->smax_value >= cur->smax_value; ++} ++ ++/* Maximum number of register states that can exist at once */ ++#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) ++struct idpair { ++ u32 old; ++ u32 cur; ++}; ++ ++/* If in the old state two registers had the same id, then they need to have ++ * the same id in the new state as well. But that id could be different from ++ * the old state, so we need to track the mapping from old to new ids. ++ * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent ++ * regs with old id 5 must also have new id 9 for the new state to be safe. But ++ * regs with a different old id could still have new id 9, we don't care about ++ * that. ++ * So we look through our idmap to see if this old id has been seen before. If ++ * so, we require the new id to match; otherwise, we add the id pair to the map. ++ */ ++static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < ID_MAP_SIZE; i++) { ++ if (!idmap[i].old) { ++ /* Reached an empty slot; haven't seen this id before */ ++ idmap[i].old = old_id; ++ idmap[i].cur = cur_id; ++ return true; ++ } ++ if (idmap[i].old == old_id) ++ return idmap[i].cur == cur_id; ++ } ++ /* We ran out of idmap slots, which should be impossible */ ++ WARN_ON_ONCE(1); ++ return false; ++} ++ ++static void clean_func_state(struct bpf_verifier_env *env, ++ struct bpf_func_state *st) ++{ ++ enum bpf_reg_liveness live; ++ int i, j; ++ ++ for (i = 0; i < BPF_REG_FP; i++) { ++ live = st->regs[i].live; ++ /* liveness must not touch this register anymore */ ++ st->regs[i].live |= REG_LIVE_DONE; ++ if (!(live & REG_LIVE_READ)) ++ /* since the register is unused, clear its state ++ * to make further comparison simpler ++ */ ++ __mark_reg_not_init(env, &st->regs[i]); ++ } ++ ++ for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { ++ live = st->stack[i].spilled_ptr.live; ++ /* liveness must not touch this stack slot anymore */ ++ st->stack[i].spilled_ptr.live |= REG_LIVE_DONE; ++ if (!(live & REG_LIVE_READ)) { ++ __mark_reg_not_init(env, &st->stack[i].spilled_ptr); ++ for (j = 0; j < BPF_REG_SIZE; j++) ++ st->stack[i].slot_type[j] = STACK_INVALID; ++ } ++ } ++} ++ ++static void clean_verifier_state(struct bpf_verifier_env *env, ++ struct bpf_verifier_state *st) ++{ ++ int i; ++ ++ if (st->frame[0]->regs[0].live & REG_LIVE_DONE) ++ /* all regs in this state in all frames were already marked */ ++ return; ++ ++ for (i = 0; i <= st->curframe; i++) ++ clean_func_state(env, st->frame[i]); ++} ++ ++/* the parentage chains form a tree. ++ * the verifier states are added to state lists at given insn and ++ * pushed into state stack for future exploration. ++ * when the verifier reaches bpf_exit insn some of the verifer states ++ * stored in the state lists have their final liveness state already, ++ * but a lot of states will get revised from liveness point of view when ++ * the verifier explores other branches. ++ * Example: ++ * 1: r0 = 1 ++ * 2: if r1 == 100 goto pc+1 ++ * 3: r0 = 2 ++ * 4: exit ++ * when the verifier reaches exit insn the register r0 in the state list of ++ * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch ++ * of insn 2 and goes exploring further. At the insn 4 it will walk the ++ * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ. ++ * ++ * Since the verifier pushes the branch states as it sees them while exploring ++ * the program the condition of walking the branch instruction for the second ++ * time means that all states below this branch were already explored and ++ * their final liveness markes are already propagated. ++ * Hence when the verifier completes the search of state list in is_state_visited() ++ * we can call this clean_live_states() function to mark all liveness states ++ * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state' ++ * will not be used. ++ * This function also clears the registers and stack for states that !READ ++ * to simplify state merging. ++ * ++ * Important note here that walking the same branch instruction in the callee ++ * doesn't meant that the states are DONE. The verifier has to compare ++ * the callsites ++ */ ++static void clean_live_states(struct bpf_verifier_env *env, int insn, ++ struct bpf_verifier_state *cur) ++{ ++ struct bpf_verifier_state_list *sl; ++ int i; ++ ++ sl = *explored_state(env, insn); ++ while (sl) { ++ if (sl->state.branches) ++ goto next; ++ if (sl->state.insn_idx != insn || ++ sl->state.curframe != cur->curframe) ++ goto next; ++ for (i = 0; i <= cur->curframe; i++) ++ if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) ++ goto next; ++ clean_verifier_state(env, &sl->state); ++next: ++ sl = sl->next; ++ } ++} ++ ++/* Returns true if (rold safe implies rcur safe) */ ++static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, ++ struct idpair *idmap) ++{ ++ bool equal; ++ ++ if (!(rold->live & REG_LIVE_READ)) ++ /* explored state didn't use this */ ++ return true; ++ ++ equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0; ++ ++ if (rold->type == PTR_TO_STACK) ++ /* two stack pointers are equal only if they're pointing to ++ * the same stack frame, since fp-8 in foo != fp-8 in bar ++ */ ++ return equal && rold->frameno == rcur->frameno; ++ ++ if (equal) ++ return true; ++ ++ if (rold->type == NOT_INIT) ++ /* explored state can't have used this */ ++ return true; ++ if (rcur->type == NOT_INIT) ++ return false; ++ switch (rold->type) { ++ case SCALAR_VALUE: ++ if (rcur->type == SCALAR_VALUE) { ++ if (!rold->precise && !rcur->precise) ++ return true; ++ /* new val must satisfy old val knowledge */ ++ return range_within(rold, rcur) && ++ tnum_in(rold->var_off, rcur->var_off); ++ } else { ++ /* We're trying to use a pointer in place of a scalar. ++ * Even if the scalar was unbounded, this could lead to ++ * pointer leaks because scalars are allowed to leak ++ * while pointers are not. We could make this safe in ++ * special cases if root is calling us, but it's ++ * probably not worth the hassle. ++ */ ++ return false; ++ } ++ case PTR_TO_MAP_VALUE: ++ /* If the new min/max/var_off satisfy the old ones and ++ * everything else matches, we are OK. ++ * 'id' is not compared, since it's only used for maps with ++ * bpf_spin_lock inside map element and in such cases if ++ * the rest of the prog is valid for one map element then ++ * it's valid for all map elements regardless of the key ++ * used in bpf_map_lookup() ++ */ ++ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && ++ range_within(rold, rcur) && ++ tnum_in(rold->var_off, rcur->var_off); ++ case PTR_TO_MAP_VALUE_OR_NULL: ++ /* a PTR_TO_MAP_VALUE could be safe to use as a ++ * PTR_TO_MAP_VALUE_OR_NULL into the same map. ++ * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- ++ * checked, doing so could have affected others with the same ++ * id, and we can't check for that because we lost the id when ++ * we converted to a PTR_TO_MAP_VALUE. ++ */ ++ if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL) ++ return false; ++ if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) ++ return false; ++ /* Check our ids match any regs they're supposed to */ ++ return check_ids(rold->id, rcur->id, idmap); ++ case PTR_TO_PACKET_META: ++ case PTR_TO_PACKET: ++ if (rcur->type != rold->type) ++ return false; ++ /* We must have at least as much range as the old ptr ++ * did, so that any accesses which were safe before are ++ * still safe. This is true even if old range < old off, ++ * since someone could have accessed through (ptr - k), or ++ * even done ptr -= k in a register, to get a safe access. ++ */ ++ if (rold->range > rcur->range) ++ return false; ++ /* If the offsets don't match, we can't trust our alignment; ++ * nor can we be sure that we won't fall out of range. ++ */ ++ if (rold->off != rcur->off) ++ return false; ++ /* id relations must be preserved */ ++ if (rold->id && !check_ids(rold->id, rcur->id, idmap)) ++ return false; ++ /* new val must satisfy old val knowledge */ ++ return range_within(rold, rcur) && ++ tnum_in(rold->var_off, rcur->var_off); ++ case PTR_TO_CTX: ++ case CONST_PTR_TO_MAP: ++ case PTR_TO_PACKET_END: ++ case PTR_TO_FLOW_KEYS: ++ case PTR_TO_SOCKET: ++ case PTR_TO_SOCKET_OR_NULL: ++ case PTR_TO_SOCK_COMMON: ++ case PTR_TO_SOCK_COMMON_OR_NULL: ++ case PTR_TO_TCP_SOCK: ++ case PTR_TO_TCP_SOCK_OR_NULL: ++ case PTR_TO_XDP_SOCK: ++ /* Only valid matches are exact, which memcmp() above ++ * would have accepted ++ */ ++ default: ++ /* Don't know what's going on, just say it's not safe */ ++ return false; ++ } ++ ++ /* Shouldn't get here; if we do, say it's not safe */ ++ WARN_ON_ONCE(1); ++ return false; ++} ++ ++static bool stacksafe(struct bpf_func_state *old, ++ struct bpf_func_state *cur, ++ struct idpair *idmap) ++{ ++ int i, spi; ++ ++ /* walk slots of the explored stack and ignore any additional ++ * slots in the current stack, since explored(safe) state ++ * didn't use them ++ */ ++ for (i = 0; i < old->allocated_stack; i++) { ++ spi = i / BPF_REG_SIZE; ++ ++ if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { ++ i += BPF_REG_SIZE - 1; ++ /* explored state didn't use this */ ++ continue; ++ } ++ ++ if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) ++ continue; ++ ++ /* explored stack has more populated slots than current stack ++ * and these slots were used ++ */ ++ if (i >= cur->allocated_stack) ++ return false; ++ ++ /* if old state was safe with misc data in the stack ++ * it will be safe with zero-initialized stack. ++ * The opposite is not true ++ */ ++ if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC && ++ cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) ++ continue; ++ if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != ++ cur->stack[spi].slot_type[i % BPF_REG_SIZE]) ++ /* Ex: old explored (safe) state has STACK_SPILL in ++ * this stack slot, but current has has STACK_MISC -> ++ * this verifier states are not equivalent, ++ * return false to continue verification of this path ++ */ ++ return false; ++ if (i % BPF_REG_SIZE) ++ continue; ++ if (old->stack[spi].slot_type[0] != STACK_SPILL) ++ continue; ++ if (!regsafe(&old->stack[spi].spilled_ptr, ++ &cur->stack[spi].spilled_ptr, ++ idmap)) ++ /* when explored and current stack slot are both storing ++ * spilled registers, check that stored pointers types ++ * are the same as well. ++ * Ex: explored safe path could have stored ++ * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} ++ * but current path has stored: ++ * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} ++ * such verifier states are not equivalent. ++ * return false to continue verification of this path ++ */ ++ return false; ++ } ++ return true; ++} ++ ++static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur) ++{ ++ if (old->acquired_refs != cur->acquired_refs) ++ return false; ++ return !memcmp(old->refs, cur->refs, ++ sizeof(*old->refs) * old->acquired_refs); ++} ++ + /* compare two verifier states + * + * all states stored in state_list are known to be valid, since +@@ -1629,165 +7286,562 @@ err_free: + * whereas register type in current state is meaningful, it means that + * the current state will reach 'bpf_exit' instruction safely + */ +-static bool states_equal(struct verifier_state *old, struct verifier_state *cur) ++static bool func_states_equal(struct bpf_func_state *old, ++ struct bpf_func_state *cur) + { ++ struct idpair *idmap; ++ bool ret = false; + int i; + ++ idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL); ++ /* If we failed to allocate the idmap, just say it's not safe */ ++ if (!idmap) ++ return false; ++ + for (i = 0; i < MAX_BPF_REG; i++) { +- if (memcmp(&old->regs[i], &cur->regs[i], +- sizeof(old->regs[0])) != 0) { +- if (old->regs[i].type == NOT_INIT || +- (old->regs[i].type == UNKNOWN_VALUE && +- cur->regs[i].type != NOT_INIT)) +- continue; ++ if (!regsafe(&old->regs[i], &cur->regs[i], idmap)) ++ goto out_free; ++ } ++ ++ if (!stacksafe(old, cur, idmap)) ++ goto out_free; ++ ++ if (!refsafe(old, cur)) ++ goto out_free; ++ ret = true; ++out_free: ++ kfree(idmap); ++ return ret; ++} ++ ++static bool states_equal(struct bpf_verifier_env *env, ++ struct bpf_verifier_state *old, ++ struct bpf_verifier_state *cur) ++{ ++ int i; ++ ++ if (old->curframe != cur->curframe) ++ return false; ++ ++ /* Verification state from speculative execution simulation ++ * must never prune a non-speculative execution one. ++ */ ++ if (old->speculative && !cur->speculative) ++ return false; ++ ++ if (old->active_spin_lock != cur->active_spin_lock) ++ return false; ++ ++ /* for states to be equal callsites have to be the same ++ * and all frame states need to be equivalent ++ */ ++ for (i = 0; i <= old->curframe; i++) { ++ if (old->frame[i]->callsite != cur->frame[i]->callsite) ++ return false; ++ if (!func_states_equal(old->frame[i], cur->frame[i])) + return false; ++ } ++ return true; ++} ++ ++/* Return 0 if no propagation happened. Return negative error code if error ++ * happened. Otherwise, return the propagated bit. ++ */ ++static int propagate_liveness_reg(struct bpf_verifier_env *env, ++ struct bpf_reg_state *reg, ++ struct bpf_reg_state *parent_reg) ++{ ++ u8 parent_flag = parent_reg->live & REG_LIVE_READ; ++ u8 flag = reg->live & REG_LIVE_READ; ++ int err; ++ ++ /* When comes here, read flags of PARENT_REG or REG could be any of ++ * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need ++ * of propagation if PARENT_REG has strongest REG_LIVE_READ64. ++ */ ++ if (parent_flag == REG_LIVE_READ64 || ++ /* Or if there is no read flag from REG. */ ++ !flag || ++ /* Or if the read flag from REG is the same as PARENT_REG. */ ++ parent_flag == flag) ++ return 0; ++ ++ err = mark_reg_read(env, reg, parent_reg, flag); ++ if (err) ++ return err; ++ ++ return flag; ++} ++ ++/* A write screens off any subsequent reads; but write marks come from the ++ * straight-line code between a state and its parent. When we arrive at an ++ * equivalent state (jump target or such) we didn't arrive by the straight-line ++ * code, so read marks in the state must propagate to the parent regardless ++ * of the state's write marks. That's what 'parent == state->parent' comparison ++ * in mark_reg_read() is for. ++ */ ++static int propagate_liveness(struct bpf_verifier_env *env, ++ const struct bpf_verifier_state *vstate, ++ struct bpf_verifier_state *vparent) ++{ ++ struct bpf_reg_state *state_reg, *parent_reg; ++ struct bpf_func_state *state, *parent; ++ int i, frame, err = 0; ++ ++ if (vparent->curframe != vstate->curframe) { ++ WARN(1, "propagate_live: parent frame %d current frame %d\n", ++ vparent->curframe, vstate->curframe); ++ return -EFAULT; ++ } ++ /* Propagate read liveness of registers... */ ++ BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); ++ for (frame = 0; frame <= vstate->curframe; frame++) { ++ parent = vparent->frame[frame]; ++ state = vstate->frame[frame]; ++ parent_reg = parent->regs; ++ state_reg = state->regs; ++ /* We don't need to worry about FP liveness, it's read-only */ ++ for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { ++ err = propagate_liveness_reg(env, &state_reg[i], ++ &parent_reg[i]); ++ if (err < 0) ++ return err; ++ if (err == REG_LIVE_READ64) ++ mark_insn_zext(env, &parent_reg[i]); ++ } ++ ++ /* Propagate stack slots. */ ++ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && ++ i < parent->allocated_stack / BPF_REG_SIZE; i++) { ++ parent_reg = &parent->stack[i].spilled_ptr; ++ state_reg = &state->stack[i].spilled_ptr; ++ err = propagate_liveness_reg(env, state_reg, ++ parent_reg); ++ if (err < 0) ++ return err; + } + } ++ return 0; ++} + +- for (i = 0; i < MAX_BPF_STACK; i++) { +- if (old->stack_slot_type[i] == STACK_INVALID) ++/* find precise scalars in the previous equivalent state and ++ * propagate them into the current state ++ */ ++static int propagate_precision(struct bpf_verifier_env *env, ++ const struct bpf_verifier_state *old) ++{ ++ struct bpf_reg_state *state_reg; ++ struct bpf_func_state *state; ++ int i, err = 0; ++ ++ state = old->frame[old->curframe]; ++ state_reg = state->regs; ++ for (i = 0; i < BPF_REG_FP; i++, state_reg++) { ++ if (state_reg->type != SCALAR_VALUE || ++ !state_reg->precise) + continue; +- if (old->stack_slot_type[i] != cur->stack_slot_type[i]) +- /* Ex: old explored (safe) state has STACK_SPILL in +- * this stack slot, but current has has STACK_MISC -> +- * this verifier states are not equivalent, +- * return false to continue verification of this path +- */ +- return false; +- if (i % BPF_REG_SIZE) ++ if (env->log.level & BPF_LOG_LEVEL2) ++ verbose(env, "propagating r%d\n", i); ++ err = mark_chain_precision(env, i); ++ if (err < 0) ++ return err; ++ } ++ ++ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { ++ if (state->stack[i].slot_type[0] != STACK_SPILL) + continue; +- if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE], +- &cur->spilled_regs[i / BPF_REG_SIZE], +- sizeof(old->spilled_regs[0]))) +- /* when explored and current stack slot types are +- * the same, check that stored pointers types +- * are the same as well. +- * Ex: explored safe path could have stored +- * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8} +- * but current path has stored: +- * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16} +- * such verifier states are not equivalent. +- * return false to continue verification of this path +- */ +- return false; +- else ++ state_reg = &state->stack[i].spilled_ptr; ++ if (state_reg->type != SCALAR_VALUE || ++ !state_reg->precise) + continue; ++ if (env->log.level & BPF_LOG_LEVEL2) ++ verbose(env, "propagating fp%d\n", ++ (-i - 1) * BPF_REG_SIZE); ++ err = mark_chain_precision_stack(env, i); ++ if (err < 0) ++ return err; + } ++ return 0; ++} ++ ++static bool states_maybe_looping(struct bpf_verifier_state *old, ++ struct bpf_verifier_state *cur) ++{ ++ struct bpf_func_state *fold, *fcur; ++ int i, fr = cur->curframe; ++ ++ if (old->curframe != fr) ++ return false; ++ ++ fold = old->frame[fr]; ++ fcur = cur->frame[fr]; ++ for (i = 0; i < MAX_BPF_REG; i++) ++ if (memcmp(&fold->regs[i], &fcur->regs[i], ++ offsetof(struct bpf_reg_state, parent))) ++ return false; + return true; + } + +-static int is_state_visited(struct verifier_env *env, int insn_idx) ++ ++static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) + { +- struct verifier_state_list *new_sl; +- struct verifier_state_list *sl; ++ struct bpf_verifier_state_list *new_sl; ++ struct bpf_verifier_state_list *sl, **pprev; ++ struct bpf_verifier_state *cur = env->cur_state, *new; ++ int i, j, err, states_cnt = 0; ++ bool add_new_state = env->test_state_freq ? true : false; + +- sl = env->explored_states[insn_idx]; +- if (!sl) ++ cur->last_insn_idx = env->prev_insn_idx; ++ if (!env->insn_aux_data[insn_idx].prune_point) + /* this 'insn_idx' instruction wasn't marked, so we will not + * be doing state search here + */ + return 0; + +- while (sl != STATE_LIST_MARK) { +- if (states_equal(&sl->state, &env->cur_state)) ++ /* bpf progs typically have pruning point every 4 instructions ++ * http://vger.kernel.org/bpfconf2019.html#session-1 ++ * Do not add new state for future pruning if the verifier hasn't seen ++ * at least 2 jumps and at least 8 instructions. ++ * This heuristics helps decrease 'total_states' and 'peak_states' metric. ++ * In tests that amounts to up to 50% reduction into total verifier ++ * memory consumption and 20% verifier time speedup. ++ */ ++ if (env->jmps_processed - env->prev_jmps_processed >= 2 && ++ env->insn_processed - env->prev_insn_processed >= 8) ++ add_new_state = true; ++ ++ pprev = explored_state(env, insn_idx); ++ sl = *pprev; ++ ++ clean_live_states(env, insn_idx, cur); ++ ++ while (sl) { ++ states_cnt++; ++ if (sl->state.insn_idx != insn_idx) ++ goto next; ++ if (sl->state.branches) { ++ if (states_maybe_looping(&sl->state, cur) && ++ states_equal(env, &sl->state, cur)) { ++ verbose_linfo(env, insn_idx, "; "); ++ verbose(env, "infinite loop detected at insn %d\n", insn_idx); ++ return -EINVAL; ++ } ++ /* if the verifier is processing a loop, avoid adding new state ++ * too often, since different loop iterations have distinct ++ * states and may not help future pruning. ++ * This threshold shouldn't be too low to make sure that ++ * a loop with large bound will be rejected quickly. ++ * The most abusive loop will be: ++ * r1 += 1 ++ * if r1 < 1000000 goto pc-2 ++ * 1M insn_procssed limit / 100 == 10k peak states. ++ * This threshold shouldn't be too high either, since states ++ * at the end of the loop are likely to be useful in pruning. ++ */ ++ if (env->jmps_processed - env->prev_jmps_processed < 20 && ++ env->insn_processed - env->prev_insn_processed < 100) ++ add_new_state = false; ++ goto miss; ++ } ++ if (states_equal(env, &sl->state, cur)) { ++ sl->hit_cnt++; + /* reached equivalent register/stack state, +- * prune the search ++ * prune the search. ++ * Registers read by the continuation are read by us. ++ * If we have any write marks in env->cur_state, they ++ * will prevent corresponding reads in the continuation ++ * from reaching our parent (an explored_state). Our ++ * own state will get the read marks recorded, but ++ * they'll be immediately forgotten as we're pruning ++ * this state and will pop a new one. + */ +- return 1; +- sl = sl->next; +- } ++ err = propagate_liveness(env, &sl->state, cur); + +- /* there were no equivalent states, remember current one. +- * technically the current state is not proven to be safe yet, +- * but it will either reach bpf_exit (which means it's safe) or +- * it will be rejected. Since there are no loops, we won't be +- * seeing this 'insn_idx' instruction again on the way to bpf_exit ++ /* if previous state reached the exit with precision and ++ * current state is equivalent to it (except precsion marks) ++ * the precision needs to be propagated back in ++ * the current state. ++ */ ++ err = err ? : push_jmp_history(env, cur); ++ err = err ? : propagate_precision(env, &sl->state); ++ if (err) ++ return err; ++ return 1; ++ } ++miss: ++ /* when new state is not going to be added do not increase miss count. ++ * Otherwise several loop iterations will remove the state ++ * recorded earlier. The goal of these heuristics is to have ++ * states from some iterations of the loop (some in the beginning ++ * and some at the end) to help pruning. ++ */ ++ if (add_new_state) ++ sl->miss_cnt++; ++ /* heuristic to determine whether this state is beneficial ++ * to keep checking from state equivalence point of view. ++ * Higher numbers increase max_states_per_insn and verification time, ++ * but do not meaningfully decrease insn_processed. ++ */ ++ if (sl->miss_cnt > sl->hit_cnt * 3 + 3) { ++ /* the state is unlikely to be useful. Remove it to ++ * speed up verification ++ */ ++ *pprev = sl->next; ++ if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { ++ u32 br = sl->state.branches; ++ ++ WARN_ONCE(br, ++ "BUG live_done but branches_to_explore %d\n", ++ br); ++ free_verifier_state(&sl->state, false); ++ kfree(sl); ++ env->peak_states--; ++ } else { ++ /* cannot free this state, since parentage chain may ++ * walk it later. Add it for free_list instead to ++ * be freed at the end of verification ++ */ ++ sl->next = env->free_list; ++ env->free_list = sl; ++ } ++ sl = *pprev; ++ continue; ++ } ++next: ++ pprev = &sl->next; ++ sl = *pprev; ++ } ++ ++ if (env->max_states_per_insn < states_cnt) ++ env->max_states_per_insn = states_cnt; ++ ++ if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) ++ return push_jmp_history(env, cur); ++ ++ if (!add_new_state) ++ return push_jmp_history(env, cur); ++ ++ /* There were no equivalent states, remember the current one. ++ * Technically the current state is not proven to be safe yet, ++ * but it will either reach outer most bpf_exit (which means it's safe) ++ * or it will be rejected. When there are no loops the verifier won't be ++ * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) ++ * again on the way to bpf_exit. ++ * When looping the sl->state.branches will be > 0 and this state ++ * will not be considered for equivalence until branches == 0. + */ +- new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER); ++ new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); + if (!new_sl) + return -ENOMEM; ++ env->total_states++; ++ env->peak_states++; ++ env->prev_jmps_processed = env->jmps_processed; ++ env->prev_insn_processed = env->insn_processed; + + /* add new state to the head of linked list */ +- memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state)); +- new_sl->next = env->explored_states[insn_idx]; +- env->explored_states[insn_idx] = new_sl; ++ new = &new_sl->state; ++ err = copy_verifier_state(new, cur); ++ if (err) { ++ free_verifier_state(new, false); ++ kfree(new_sl); ++ return err; ++ } ++ new->insn_idx = insn_idx; ++ WARN_ONCE(new->branches != 1, ++ "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); ++ ++ cur->parent = new; ++ cur->first_insn_idx = insn_idx; ++ clear_jmp_history(cur); ++ new_sl->next = *explored_state(env, insn_idx); ++ *explored_state(env, insn_idx) = new_sl; ++ /* connect new state to parentage chain. Current frame needs all ++ * registers connected. Only r6 - r9 of the callers are alive (pushed ++ * to the stack implicitly by JITs) so in callers' frames connect just ++ * r6 - r9 as an optimization. Callers will have r1 - r5 connected to ++ * the state of the call instruction (with WRITTEN set), and r0 comes ++ * from callee with its full parentage chain, anyway. ++ */ ++ /* clear write marks in current state: the writes we did are not writes ++ * our child did, so they don't screen off its reads from us. ++ * (There are no read marks in current state, because reads always mark ++ * their parent and current state never has children yet. Only ++ * explored_states can get read marks.) ++ */ ++ for (j = 0; j <= cur->curframe; j++) { ++ for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) ++ cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; ++ for (i = 0; i < BPF_REG_FP; i++) ++ cur->frame[j]->regs[i].live = REG_LIVE_NONE; ++ } ++ ++ /* all stack frames are accessible from callee, clear them all */ ++ for (j = 0; j <= cur->curframe; j++) { ++ struct bpf_func_state *frame = cur->frame[j]; ++ struct bpf_func_state *newframe = new->frame[j]; ++ ++ for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) { ++ frame->stack[i].spilled_ptr.live = REG_LIVE_NONE; ++ frame->stack[i].spilled_ptr.parent = ++ &newframe->stack[i].spilled_ptr; ++ } ++ } + return 0; + } + +-static int do_check(struct verifier_env *env) ++/* Return true if it's OK to have the same insn return a different type. */ ++static bool reg_type_mismatch_ok(enum bpf_reg_type type) + { +- struct verifier_state *state = &env->cur_state; ++ switch (type) { ++ case PTR_TO_CTX: ++ case PTR_TO_SOCKET: ++ case PTR_TO_SOCKET_OR_NULL: ++ case PTR_TO_SOCK_COMMON: ++ case PTR_TO_SOCK_COMMON_OR_NULL: ++ case PTR_TO_TCP_SOCK: ++ case PTR_TO_TCP_SOCK_OR_NULL: ++ case PTR_TO_XDP_SOCK: ++ return false; ++ default: ++ return true; ++ } ++} ++ ++/* If an instruction was previously used with particular pointer types, then we ++ * need to be careful to avoid cases such as the below, where it may be ok ++ * for one branch accessing the pointer, but not ok for the other branch: ++ * ++ * R1 = sock_ptr ++ * goto X; ++ * ... ++ * R1 = some_other_valid_ptr; ++ * goto X; ++ * ... ++ * R2 = *(u32 *)(R1 + 0); ++ */ ++static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) ++{ ++ return src != prev && (!reg_type_mismatch_ok(src) || ++ !reg_type_mismatch_ok(prev)); ++} ++ ++static int do_check(struct bpf_verifier_env *env) ++{ ++ struct bpf_verifier_state *state; + struct bpf_insn *insns = env->prog->insnsi; +- struct reg_state *regs = state->regs; ++ struct bpf_reg_state *regs; + int insn_cnt = env->prog->len; +- int insn_idx, prev_insn_idx = 0; +- int insn_processed = 0; + bool do_print_state = false; ++ int prev_insn_idx = -1; ++ ++ env->prev_linfo = NULL; ++ ++ state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); ++ if (!state) ++ return -ENOMEM; ++ state->curframe = 0; ++ state->speculative = false; ++ state->branches = 1; ++ state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); ++ if (!state->frame[0]) { ++ kfree(state); ++ return -ENOMEM; ++ } ++ env->cur_state = state; ++ init_func_state(env, state->frame[0], ++ BPF_MAIN_FUNC /* callsite */, ++ 0 /* frameno */, ++ 0 /* subprogno, zero == main subprog */); + +- init_reg_state(regs); +- insn_idx = 0; + for (;;) { + struct bpf_insn *insn; + u8 class; + int err; + +- if (insn_idx >= insn_cnt) { +- verbose("invalid insn idx %d insn_cnt %d\n", +- insn_idx, insn_cnt); ++ env->prev_insn_idx = prev_insn_idx; ++ if (env->insn_idx >= insn_cnt) { ++ verbose(env, "invalid insn idx %d insn_cnt %d\n", ++ env->insn_idx, insn_cnt); + return -EFAULT; + } + +- insn = &insns[insn_idx]; ++ insn = &insns[env->insn_idx]; + class = BPF_CLASS(insn->code); + +- if (++insn_processed > 32768) { +- verbose("BPF program is too large. Proccessed %d insn\n", +- insn_processed); ++ if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { ++ verbose(env, ++ "BPF program is too large. Processed %d insn\n", ++ env->insn_processed); + return -E2BIG; + } + +- err = is_state_visited(env, insn_idx); ++ err = is_state_visited(env, env->insn_idx); + if (err < 0) + return err; + if (err == 1) { + /* found equivalent state, can prune the search */ +- if (log_level) { ++ if (env->log.level & BPF_LOG_LEVEL) { + if (do_print_state) +- verbose("\nfrom %d to %d: safe\n", +- prev_insn_idx, insn_idx); ++ verbose(env, "\nfrom %d to %d%s: safe\n", ++ env->prev_insn_idx, env->insn_idx, ++ env->cur_state->speculative ? ++ " (speculative execution)" : ""); + else +- verbose("%d: safe\n", insn_idx); ++ verbose(env, "%d: safe\n", env->insn_idx); + } + goto process_bpf_exit; + } + +- if (log_level && do_print_state) { +- verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx); +- print_verifier_state(env); ++ if (signal_pending(current)) ++ return -EAGAIN; ++ ++ if (need_resched()) ++ cond_resched(); ++ ++ if (env->log.level & BPF_LOG_LEVEL2 || ++ (env->log.level & BPF_LOG_LEVEL && do_print_state)) { ++ if (env->log.level & BPF_LOG_LEVEL2) ++ verbose(env, "%d:", env->insn_idx); ++ else ++ verbose(env, "\nfrom %d to %d%s:", ++ env->prev_insn_idx, env->insn_idx, ++ env->cur_state->speculative ? ++ " (speculative execution)" : ""); ++ print_verifier_state(env, state->frame[state->curframe]); + do_print_state = false; + } + +- if (log_level) { +- verbose("%d: ", insn_idx); +- print_bpf_insn(env, insn); ++ if (env->log.level & BPF_LOG_LEVEL) { ++ const struct bpf_insn_cbs cbs = { ++ .cb_print = verbose, ++ .private_data = env, ++ }; ++ ++ verbose_linfo(env, env->insn_idx, "; "); ++ verbose(env, "%d: ", env->insn_idx); ++ print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); + } + ++ regs = cur_regs(env); ++ sanitize_mark_insn_seen(env); ++ prev_insn_idx = env->insn_idx; ++ + if (class == BPF_ALU || class == BPF_ALU64) { + err = check_alu_op(env, insn); + if (err) + return err; + + } else if (class == BPF_LDX) { +- enum bpf_reg_type src_reg_type; ++ enum bpf_reg_type *prev_src_type, src_reg_type; + + /* check for reserved fields is already done */ + + /* check src operand */ +- err = check_reg_arg(regs, insn->src_reg, SRC_OP); ++ err = check_reg_arg(env, insn->src_reg, SRC_OP); + if (err) + return err; + +- err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK); ++ err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); + if (err) + return err; + +@@ -1796,27 +7850,22 @@ static int do_check(struct verifier_env + /* check that memory (src_reg + off) is readable, + * the state of dst_reg will be updated by this func + */ +- err = check_mem_access(env, insn->src_reg, insn->off, +- BPF_SIZE(insn->code), BPF_READ, +- insn->dst_reg); ++ err = check_mem_access(env, env->insn_idx, insn->src_reg, ++ insn->off, BPF_SIZE(insn->code), ++ BPF_READ, insn->dst_reg, false); + if (err) + return err; + +- if (BPF_SIZE(insn->code) != BPF_W) { +- insn_idx++; +- continue; +- } ++ prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type; + +- if (insn->imm == 0) { ++ if (*prev_src_type == NOT_INIT) { + /* saw a valid insn + * dst_reg = *(u32 *)(src_reg + off) +- * use reserved 'imm' field to mark this insn ++ * save type to validate intersecting paths + */ +- insn->imm = src_reg_type; ++ *prev_src_type = src_reg_type; + +- } else if (src_reg_type != insn->imm && +- (src_reg_type == PTR_TO_CTX || +- insn->imm == PTR_TO_CTX)) { ++ } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) { + /* ABuser program is trying to use the same insn + * dst_reg = *(u32*) (src_reg + off) + * with different pointer types: +@@ -1824,79 +7873,98 @@ static int do_check(struct verifier_env + * src_reg == stack|map in some other branch. + * Reject it. + */ +- verbose("same insn cannot be used with different pointers\n"); ++ verbose(env, "same insn cannot be used with different pointers\n"); + return -EINVAL; + } + + } else if (class == BPF_STX) { +- enum bpf_reg_type dst_reg_type; ++ enum bpf_reg_type *prev_dst_type, dst_reg_type; + + if (BPF_MODE(insn->code) == BPF_XADD) { +- err = check_xadd(env, insn); ++ err = check_xadd(env, env->insn_idx, insn); + if (err) + return err; +- insn_idx++; ++ env->insn_idx++; + continue; + } + + /* check src1 operand */ +- err = check_reg_arg(regs, insn->src_reg, SRC_OP); ++ err = check_reg_arg(env, insn->src_reg, SRC_OP); + if (err) + return err; + /* check src2 operand */ +- err = check_reg_arg(regs, insn->dst_reg, SRC_OP); ++ err = check_reg_arg(env, insn->dst_reg, SRC_OP); + if (err) + return err; + + dst_reg_type = regs[insn->dst_reg].type; + + /* check that memory (dst_reg + off) is writeable */ +- err = check_mem_access(env, insn->dst_reg, insn->off, +- BPF_SIZE(insn->code), BPF_WRITE, +- insn->src_reg); ++ err = check_mem_access(env, env->insn_idx, insn->dst_reg, ++ insn->off, BPF_SIZE(insn->code), ++ BPF_WRITE, insn->src_reg, false); + if (err) + return err; + +- if (insn->imm == 0) { +- insn->imm = dst_reg_type; +- } else if (dst_reg_type != insn->imm && +- (dst_reg_type == PTR_TO_CTX || +- insn->imm == PTR_TO_CTX)) { +- verbose("same insn cannot be used with different pointers\n"); ++ prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type; ++ ++ if (*prev_dst_type == NOT_INIT) { ++ *prev_dst_type = dst_reg_type; ++ } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) { ++ verbose(env, "same insn cannot be used with different pointers\n"); + return -EINVAL; + } + + } else if (class == BPF_ST) { + if (BPF_MODE(insn->code) != BPF_MEM || + insn->src_reg != BPF_REG_0) { +- verbose("BPF_ST uses reserved fields\n"); ++ verbose(env, "BPF_ST uses reserved fields\n"); + return -EINVAL; + } + /* check src operand */ +- err = check_reg_arg(regs, insn->dst_reg, SRC_OP); ++ err = check_reg_arg(env, insn->dst_reg, SRC_OP); + if (err) + return err; + ++ if (is_ctx_reg(env, insn->dst_reg)) { ++ verbose(env, "BPF_ST stores into R%d %s is not allowed\n", ++ insn->dst_reg, ++ reg_type_str[reg_state(env, insn->dst_reg)->type]); ++ return -EACCES; ++ } ++ + /* check that memory (dst_reg + off) is writeable */ +- err = check_mem_access(env, insn->dst_reg, insn->off, +- BPF_SIZE(insn->code), BPF_WRITE, +- -1); ++ err = check_mem_access(env, env->insn_idx, insn->dst_reg, ++ insn->off, BPF_SIZE(insn->code), ++ BPF_WRITE, -1, false); + if (err) + return err; + +- } else if (class == BPF_JMP) { ++ } else if (class == BPF_JMP || class == BPF_JMP32) { + u8 opcode = BPF_OP(insn->code); + ++ env->jmps_processed++; + if (opcode == BPF_CALL) { + if (BPF_SRC(insn->code) != BPF_K || + insn->off != 0 || +- insn->src_reg != BPF_REG_0 || +- insn->dst_reg != BPF_REG_0) { +- verbose("BPF_CALL uses reserved fields\n"); ++ (insn->src_reg != BPF_REG_0 && ++ insn->src_reg != BPF_PSEUDO_CALL) || ++ insn->dst_reg != BPF_REG_0 || ++ class == BPF_JMP32) { ++ verbose(env, "BPF_CALL uses reserved fields\n"); + return -EINVAL; + } + +- err = check_call(env, insn->imm); ++ if (env->cur_state->active_spin_lock && ++ (insn->src_reg == BPF_PSEUDO_CALL || ++ insn->imm != BPF_FUNC_spin_unlock)) { ++ verbose(env, "function calls are not allowed while holding a lock\n"); ++ return -EINVAL; ++ } ++ if (insn->src_reg == BPF_PSEUDO_CALL) ++ err = check_func_call(env, insn, &env->insn_idx); ++ else ++ err = check_helper_call(env, insn->imm, env->insn_idx); + if (err) + return err; + +@@ -1904,48 +7972,75 @@ static int do_check(struct verifier_env + if (BPF_SRC(insn->code) != BPF_K || + insn->imm != 0 || + insn->src_reg != BPF_REG_0 || +- insn->dst_reg != BPF_REG_0) { +- verbose("BPF_JA uses reserved fields\n"); ++ insn->dst_reg != BPF_REG_0 || ++ class == BPF_JMP32) { ++ verbose(env, "BPF_JA uses reserved fields\n"); + return -EINVAL; + } + +- insn_idx += insn->off + 1; ++ env->insn_idx += insn->off + 1; + continue; + + } else if (opcode == BPF_EXIT) { + if (BPF_SRC(insn->code) != BPF_K || + insn->imm != 0 || + insn->src_reg != BPF_REG_0 || +- insn->dst_reg != BPF_REG_0) { +- verbose("BPF_EXIT uses reserved fields\n"); ++ insn->dst_reg != BPF_REG_0 || ++ class == BPF_JMP32) { ++ verbose(env, "BPF_EXIT uses reserved fields\n"); + return -EINVAL; + } + ++ if (env->cur_state->active_spin_lock) { ++ verbose(env, "bpf_spin_unlock is missing\n"); ++ return -EINVAL; ++ } ++ ++ if (state->curframe) { ++ /* exit from nested function */ ++ err = prepare_func_exit(env, &env->insn_idx); ++ if (err) ++ return err; ++ do_print_state = true; ++ continue; ++ } ++ ++ err = check_reference_leak(env); ++ if (err) ++ return err; ++ + /* eBPF calling convetion is such that R0 is used + * to return the value from eBPF program. + * Make sure that it's readable at this time + * of bpf_exit, which means that program wrote + * something into it earlier + */ +- err = check_reg_arg(regs, BPF_REG_0, SRC_OP); ++ err = check_reg_arg(env, BPF_REG_0, SRC_OP); + if (err) + return err; + + if (is_pointer_value(env, BPF_REG_0)) { +- verbose("R0 leaks addr as return value\n"); ++ verbose(env, "R0 leaks addr as return value\n"); + return -EACCES; + } + ++ err = check_return_code(env); ++ if (err) ++ return err; + process_bpf_exit: +- insn_idx = pop_stack(env, &prev_insn_idx); +- if (insn_idx < 0) { ++ update_branch_counts(env, env->cur_state); ++ err = pop_stack(env, &prev_insn_idx, ++ &env->insn_idx); ++ if (err < 0) { ++ if (err != -ENOENT) ++ return err; + break; + } else { + do_print_state = true; + continue; + } + } else { +- err = check_cond_jmp_op(env, insn, &insn_idx); ++ err = check_cond_jmp_op(env, insn, &env->insn_idx); + if (err) + return err; + } +@@ -1962,83 +8057,194 @@ process_bpf_exit: + if (err) + return err; + +- insn_idx++; ++ env->insn_idx++; ++ sanitize_mark_insn_seen(env); + } else { +- verbose("invalid BPF_LD mode\n"); ++ verbose(env, "invalid BPF_LD mode\n"); + return -EINVAL; + } + } else { +- verbose("unknown insn class %d\n", class); ++ verbose(env, "unknown insn class %d\n", class); + return -EINVAL; + } + +- insn_idx++; ++ env->insn_idx++; + } + ++ env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; + return 0; + } + ++static int check_map_prealloc(struct bpf_map *map) ++{ ++ return (map->map_type != BPF_MAP_TYPE_HASH && ++ map->map_type != BPF_MAP_TYPE_PERCPU_HASH && ++ map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) || ++ !(map->map_flags & BPF_F_NO_PREALLOC); ++} ++ ++static bool is_tracing_prog_type(enum bpf_prog_type type) ++{ ++ switch (type) { ++ case BPF_PROG_TYPE_KPROBE: ++ case BPF_PROG_TYPE_TRACEPOINT: ++ case BPF_PROG_TYPE_PERF_EVENT: ++ case BPF_PROG_TYPE_RAW_TRACEPOINT: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static int check_map_prog_compatibility(struct bpf_verifier_env *env, ++ struct bpf_map *map, ++ struct bpf_prog *prog) ++ ++{ ++ /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use ++ * preallocated hash maps, since doing memory allocation ++ * in overflow_handler can crash depending on where nmi got ++ * triggered. ++ */ ++ if (prog->type == BPF_PROG_TYPE_PERF_EVENT) { ++ if (!check_map_prealloc(map)) { ++ verbose(env, "perf_event programs can only use preallocated hash map\n"); ++ return -EINVAL; ++ } ++ if (map->inner_map_meta && ++ !check_map_prealloc(map->inner_map_meta)) { ++ verbose(env, "perf_event programs can only use preallocated inner hash map\n"); ++ return -EINVAL; ++ } ++ } ++ ++ if ((is_tracing_prog_type(prog->type) || ++ prog->type == BPF_PROG_TYPE_SOCKET_FILTER) && ++ map_value_has_spin_lock(map)) { ++ verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static bool bpf_map_is_cgroup_storage(struct bpf_map *map) ++{ ++ return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || ++ map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); ++} ++ + /* look for pseudo eBPF instructions that access map FDs and + * replace them with actual map pointers + */ +-static int replace_map_fd_with_map_ptr(struct verifier_env *env) ++static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) + { + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; +- int i, j; ++ int i, j, err; ++ ++ err = bpf_prog_calc_tag(env->prog); ++ if (err) ++ return err; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (BPF_CLASS(insn->code) == BPF_LDX && + (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) { +- verbose("BPF_LDX uses reserved fields\n"); ++ verbose(env, "BPF_LDX uses reserved fields\n"); + return -EINVAL; + } + + if (BPF_CLASS(insn->code) == BPF_STX && + ((BPF_MODE(insn->code) != BPF_MEM && + BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) { +- verbose("BPF_STX uses reserved fields\n"); ++ verbose(env, "BPF_STX uses reserved fields\n"); + return -EINVAL; + } + + if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { ++ struct bpf_insn_aux_data *aux; + struct bpf_map *map; + struct fd f; ++ u64 addr; + + if (i == insn_cnt - 1 || insn[1].code != 0 || + insn[1].dst_reg != 0 || insn[1].src_reg != 0 || + insn[1].off != 0) { +- verbose("invalid bpf_ld_imm64 insn\n"); ++ verbose(env, "invalid bpf_ld_imm64 insn\n"); + return -EINVAL; + } + +- if (insn->src_reg == 0) ++ if (insn[0].src_reg == 0) + /* valid generic load 64-bit imm */ + goto next_insn; + +- if (insn->src_reg != BPF_PSEUDO_MAP_FD) { +- verbose("unrecognized bpf_ld_imm64 insn\n"); ++ /* In final convert_pseudo_ld_imm64() step, this is ++ * converted into regular 64-bit imm load insn. ++ */ ++ if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && ++ insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) || ++ (insn[0].src_reg == BPF_PSEUDO_MAP_FD && ++ insn[1].imm != 0)) { ++ verbose(env, ++ "unrecognized bpf_ld_imm64 insn\n"); + return -EINVAL; + } + +- f = fdget(insn->imm); ++ f = fdget(insn[0].imm); + map = __bpf_map_get(f); + if (IS_ERR(map)) { +- verbose("fd %d is not pointing to valid bpf_map\n", +- insn->imm); ++ verbose(env, "fd %d is not pointing to valid bpf_map\n", ++ insn[0].imm); + return PTR_ERR(map); + } + +- /* store map pointer inside BPF_LD_IMM64 instruction */ +- insn[0].imm = (u32) (unsigned long) map; +- insn[1].imm = ((u64) (unsigned long) map) >> 32; ++ err = check_map_prog_compatibility(env, map, env->prog); ++ if (err) { ++ fdput(f); ++ return err; ++ } ++ ++ aux = &env->insn_aux_data[i]; ++ if (insn->src_reg == BPF_PSEUDO_MAP_FD) { ++ addr = (unsigned long)map; ++ } else { ++ u32 off = insn[1].imm; ++ ++ if (off >= BPF_MAX_VAR_OFF) { ++ verbose(env, "direct value offset of %u is not allowed\n", off); ++ fdput(f); ++ return -EINVAL; ++ } ++ ++ if (!map->ops->map_direct_value_addr) { ++ verbose(env, "no direct value access support for this map type\n"); ++ fdput(f); ++ return -EINVAL; ++ } ++ ++ err = map->ops->map_direct_value_addr(map, &addr, off); ++ if (err) { ++ verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", ++ map->value_size, off); ++ fdput(f); ++ return err; ++ } ++ ++ aux->map_off = off; ++ addr += off; ++ } ++ ++ insn[0].imm = (u32)addr; ++ insn[1].imm = addr >> 32; + + /* check whether we recorded this map already */ +- for (j = 0; j < env->used_map_cnt; j++) ++ for (j = 0; j < env->used_map_cnt; j++) { + if (env->used_maps[j] == map) { ++ aux->map_index = j; + fdput(f); + goto next_insn; + } ++ } + + if (env->used_map_cnt >= MAX_USED_MAPS) { + fdput(f); +@@ -2048,19 +8254,31 @@ static int replace_map_fd_with_map_ptr(s + /* hold the map. If the program is rejected by verifier, + * the map will be released by release_maps() or it + * will be used by the valid program until it's unloaded +- * and all maps are released in free_bpf_prog_info() ++ * and all maps are released in free_used_maps() + */ + map = bpf_map_inc(map, false); + if (IS_ERR(map)) { + fdput(f); + return PTR_ERR(map); + } ++ ++ aux->map_index = env->used_map_cnt; + env->used_maps[env->used_map_cnt++] = map; + ++ if (bpf_map_is_cgroup_storage(map)) ++ return -EINVAL; ++ + fdput(f); + next_insn: + insn++; + i++; ++ continue; ++ } ++ ++ /* Basic sanity check before we invest more work here. */ ++ if (!bpf_opcode_in_insntable(insn->code)) { ++ verbose(env, "unknown opcode %02x\n", insn->code); ++ return -EINVAL; + } + } + +@@ -2072,7 +8290,7 @@ next_insn: + } + + /* drop refcnt of maps used by the rejected program */ +-static void release_maps(struct verifier_env *env) ++static void release_maps(struct bpf_verifier_env *env) + { + int i; + +@@ -2081,7 +8299,7 @@ static void release_maps(struct verifier + } + + /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ +-static void convert_pseudo_ld_imm64(struct verifier_env *env) ++static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) + { + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; +@@ -2092,201 +8310,1266 @@ static void convert_pseudo_ld_imm64(stru + insn->src_reg = 0; + } + +-static void adjust_branches(struct bpf_prog *prog, int pos, int delta) ++/* single env->prog->insni[off] instruction was replaced with the range ++ * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying ++ * [0, off) and [off, end) to new locations, so the patched range stays zero ++ */ ++static int adjust_insn_aux_data(struct bpf_verifier_env *env, ++ struct bpf_prog *new_prog, u32 off, u32 cnt) + { +- struct bpf_insn *insn = prog->insnsi; +- int insn_cnt = prog->len; ++ struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; ++ struct bpf_insn *insn = new_prog->insnsi; ++ bool old_seen = old_data[off].seen; ++ u32 prog_len; + int i; + +- for (i = 0; i < insn_cnt; i++, insn++) { +- if (BPF_CLASS(insn->code) != BPF_JMP || +- BPF_OP(insn->code) == BPF_CALL || +- BPF_OP(insn->code) == BPF_EXIT) ++ /* aux info at OFF always needs adjustment, no matter fast path ++ * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the ++ * original insn at old prog. ++ */ ++ old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1); ++ ++ if (cnt == 1) ++ return 0; ++ prog_len = new_prog->len; ++ new_data = vzalloc(array_size(prog_len, ++ sizeof(struct bpf_insn_aux_data))); ++ if (!new_data) ++ return -ENOMEM; ++ memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); ++ memcpy(new_data + off + cnt - 1, old_data + off, ++ sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); ++ for (i = off; i < off + cnt - 1; i++) { ++ /* Expand insni[off]'s seen count to the patched range. */ ++ new_data[i].seen = old_seen; ++ new_data[i].zext_dst = insn_has_def32(env, insn + i); ++ } ++ env->insn_aux_data = new_data; ++ vfree(old_data); ++ return 0; ++} ++ ++static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) ++{ ++ int i; ++ ++ if (len == 1) ++ return; ++ /* NOTE: fake 'exit' subprog should be updated as well. */ ++ for (i = 0; i <= env->subprog_cnt; i++) { ++ if (env->subprog_info[i].start <= off) + continue; ++ env->subprog_info[i].start += len - 1; ++ } ++} ++ ++static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, ++ const struct bpf_insn *patch, u32 len) ++{ ++ struct bpf_prog *new_prog; ++ ++ new_prog = bpf_patch_insn_single(env->prog, off, patch, len); ++ if (IS_ERR(new_prog)) { ++ if (PTR_ERR(new_prog) == -ERANGE) ++ verbose(env, ++ "insn %d cannot be patched due to 16-bit range\n", ++ env->insn_aux_data[off].orig_idx); ++ return NULL; ++ } ++ if (adjust_insn_aux_data(env, new_prog, off, len)) ++ return NULL; ++ adjust_subprog_starts(env, off, len); ++ return new_prog; ++} + +- /* adjust offset of jmps if necessary */ +- if (i < pos && i + insn->off + 1 > pos) +- insn->off += delta; +- else if (i > pos + delta && i + insn->off + 1 <= pos + delta) +- insn->off -= delta; ++static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, ++ u32 off, u32 cnt) ++{ ++ int i, j; ++ ++ /* find first prog starting at or after off (first to remove) */ ++ for (i = 0; i < env->subprog_cnt; i++) ++ if (env->subprog_info[i].start >= off) ++ break; ++ /* find first prog starting at or after off + cnt (first to stay) */ ++ for (j = i; j < env->subprog_cnt; j++) ++ if (env->subprog_info[j].start >= off + cnt) ++ break; ++ /* if j doesn't start exactly at off + cnt, we are just removing ++ * the front of previous prog ++ */ ++ if (env->subprog_info[j].start != off + cnt) ++ j--; ++ ++ if (j > i) { ++ struct bpf_prog_aux *aux = env->prog->aux; ++ int move; ++ ++ /* move fake 'exit' subprog as well */ ++ move = env->subprog_cnt + 1 - j; ++ ++ memmove(env->subprog_info + i, ++ env->subprog_info + j, ++ sizeof(*env->subprog_info) * move); ++ env->subprog_cnt -= j - i; ++ ++ /* remove func_info */ ++ if (aux->func_info) { ++ move = aux->func_info_cnt - j; ++ ++ memmove(aux->func_info + i, ++ aux->func_info + j, ++ sizeof(*aux->func_info) * move); ++ aux->func_info_cnt -= j - i; ++ /* func_info->insn_off is set after all code rewrites, ++ * in adjust_btf_func() - no need to adjust ++ */ ++ } ++ } else { ++ /* convert i from "first prog to remove" to "first to adjust" */ ++ if (env->subprog_info[i].start == off) ++ i++; + } ++ ++ /* update fake 'exit' subprog as well */ ++ for (; i <= env->subprog_cnt; i++) ++ env->subprog_info[i].start -= cnt; ++ ++ return 0; ++} ++ ++static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, ++ u32 cnt) ++{ ++ struct bpf_prog *prog = env->prog; ++ u32 i, l_off, l_cnt, nr_linfo; ++ struct bpf_line_info *linfo; ++ ++ nr_linfo = prog->aux->nr_linfo; ++ if (!nr_linfo) ++ return 0; ++ ++ linfo = prog->aux->linfo; ++ ++ /* find first line info to remove, count lines to be removed */ ++ for (i = 0; i < nr_linfo; i++) ++ if (linfo[i].insn_off >= off) ++ break; ++ ++ l_off = i; ++ l_cnt = 0; ++ for (; i < nr_linfo; i++) ++ if (linfo[i].insn_off < off + cnt) ++ l_cnt++; ++ else ++ break; ++ ++ /* First live insn doesn't match first live linfo, it needs to "inherit" ++ * last removed linfo. prog is already modified, so prog->len == off ++ * means no live instructions after (tail of the program was removed). ++ */ ++ if (prog->len != off && l_cnt && ++ (i == nr_linfo || linfo[i].insn_off != off + cnt)) { ++ l_cnt--; ++ linfo[--i].insn_off = off + cnt; ++ } ++ ++ /* remove the line info which refer to the removed instructions */ ++ if (l_cnt) { ++ memmove(linfo + l_off, linfo + i, ++ sizeof(*linfo) * (nr_linfo - i)); ++ ++ prog->aux->nr_linfo -= l_cnt; ++ nr_linfo = prog->aux->nr_linfo; ++ } ++ ++ /* pull all linfo[i].insn_off >= off + cnt in by cnt */ ++ for (i = l_off; i < nr_linfo; i++) ++ linfo[i].insn_off -= cnt; ++ ++ /* fix up all subprogs (incl. 'exit') which start >= off */ ++ for (i = 0; i <= env->subprog_cnt; i++) ++ if (env->subprog_info[i].linfo_idx > l_off) { ++ /* program may have started in the removed region but ++ * may not be fully removed ++ */ ++ if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) ++ env->subprog_info[i].linfo_idx -= l_cnt; ++ else ++ env->subprog_info[i].linfo_idx = l_off; ++ } ++ ++ return 0; ++} ++ ++static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) ++{ ++ struct bpf_insn_aux_data *aux_data = env->insn_aux_data; ++ unsigned int orig_prog_len = env->prog->len; ++ int err; ++ ++ err = bpf_remove_insns(env->prog, off, cnt); ++ if (err) ++ return err; ++ ++ err = adjust_subprog_starts_after_remove(env, off, cnt); ++ if (err) ++ return err; ++ ++ err = bpf_adj_linfo_after_remove(env, off, cnt); ++ if (err) ++ return err; ++ ++ memmove(aux_data + off, aux_data + off + cnt, ++ sizeof(*aux_data) * (orig_prog_len - off - cnt)); ++ ++ return 0; + } + +-/* convert load instructions that access fields of 'struct __sk_buff' +- * into sequence of instructions that access fields of 'struct sk_buff' ++/* The verifier does more data flow analysis than llvm and will not ++ * explore branches that are dead at run time. Malicious programs can ++ * have dead code too. Therefore replace all dead at-run-time code ++ * with 'ja -1'. ++ * ++ * Just nops are not optimal, e.g. if they would sit at the end of the ++ * program and through another bug we would manage to jump there, then ++ * we'd execute beyond program memory otherwise. Returning exception ++ * code also wouldn't work since we can have subprogs where the dead ++ * code could be located. + */ +-static int convert_ctx_accesses(struct verifier_env *env) ++static void sanitize_dead_code(struct bpf_verifier_env *env) + { ++ struct bpf_insn_aux_data *aux_data = env->insn_aux_data; ++ struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1); + struct bpf_insn *insn = env->prog->insnsi; ++ const int insn_cnt = env->prog->len; ++ int i; ++ ++ for (i = 0; i < insn_cnt; i++) { ++ if (aux_data[i].seen) ++ continue; ++ memcpy(insn + i, &trap, sizeof(trap)); ++ } ++} ++ ++static bool insn_is_cond_jump(u8 code) ++{ ++ u8 op; ++ ++ if (BPF_CLASS(code) == BPF_JMP32) ++ return true; ++ ++ if (BPF_CLASS(code) != BPF_JMP) ++ return false; ++ ++ op = BPF_OP(code); ++ return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; ++} ++ ++static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) ++{ ++ struct bpf_insn_aux_data *aux_data = env->insn_aux_data; ++ struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); ++ struct bpf_insn *insn = env->prog->insnsi; ++ const int insn_cnt = env->prog->len; ++ int i; ++ ++ for (i = 0; i < insn_cnt; i++, insn++) { ++ if (!insn_is_cond_jump(insn->code)) ++ continue; ++ ++ if (!aux_data[i + 1].seen) ++ ja.off = insn->off; ++ else if (!aux_data[i + 1 + insn->off].seen) ++ ja.off = 0; ++ else ++ continue; ++ ++ memcpy(insn, &ja, sizeof(ja)); ++ } ++} ++ ++static int opt_remove_dead_code(struct bpf_verifier_env *env) ++{ ++ struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + int insn_cnt = env->prog->len; +- struct bpf_insn insn_buf[16]; ++ int i, err; ++ ++ for (i = 0; i < insn_cnt; i++) { ++ int j; ++ ++ j = 0; ++ while (i + j < insn_cnt && !aux_data[i + j].seen) ++ j++; ++ if (!j) ++ continue; ++ ++ err = verifier_remove_insns(env, i, j); ++ if (err) ++ return err; ++ insn_cnt = env->prog->len; ++ } ++ ++ return 0; ++} ++ ++static int opt_remove_nops(struct bpf_verifier_env *env) ++{ ++ const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); ++ struct bpf_insn *insn = env->prog->insnsi; ++ int insn_cnt = env->prog->len; ++ int i, err; ++ ++ for (i = 0; i < insn_cnt; i++) { ++ if (memcmp(&insn[i], &ja, sizeof(ja))) ++ continue; ++ ++ err = verifier_remove_insns(env, i, 1); ++ if (err) ++ return err; ++ insn_cnt--; ++ i--; ++ } ++ ++ return 0; ++} ++ ++static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, ++ const union bpf_attr *attr) ++{ ++ struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4]; ++ struct bpf_insn_aux_data *aux = env->insn_aux_data; ++ int i, patch_len, delta = 0, len = env->prog->len; ++ struct bpf_insn *insns = env->prog->insnsi; ++ struct bpf_prog *new_prog; ++ bool rnd_hi32; ++ ++ rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32; ++ zext_patch[1] = BPF_ZEXT_REG(0); ++ rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0); ++ rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); ++ rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX); ++ for (i = 0; i < len; i++) { ++ int adj_idx = i + delta; ++ struct bpf_insn insn; ++ ++ insn = insns[adj_idx]; ++ if (!aux[adj_idx].zext_dst) { ++ u8 code, class; ++ u32 imm_rnd; ++ ++ if (!rnd_hi32) ++ continue; ++ ++ code = insn.code; ++ class = BPF_CLASS(code); ++ if (insn_no_def(&insn)) ++ continue; ++ ++ /* NOTE: arg "reg" (the fourth one) is only used for ++ * BPF_STX which has been ruled out in above ++ * check, it is safe to pass NULL here. ++ */ ++ if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) { ++ if (class == BPF_LD && ++ BPF_MODE(code) == BPF_IMM) ++ i++; ++ continue; ++ } ++ ++ /* ctx load could be transformed into wider load. */ ++ if (class == BPF_LDX && ++ aux[adj_idx].ptr_type == PTR_TO_CTX) ++ continue; ++ ++ imm_rnd = get_random_int(); ++ rnd_hi32_patch[0] = insn; ++ rnd_hi32_patch[1].imm = imm_rnd; ++ rnd_hi32_patch[3].dst_reg = insn.dst_reg; ++ patch = rnd_hi32_patch; ++ patch_len = 4; ++ goto apply_patch_buffer; ++ } ++ ++ if (!bpf_jit_needs_zext()) ++ continue; ++ ++ zext_patch[0] = insn; ++ zext_patch[1].dst_reg = insn.dst_reg; ++ zext_patch[1].src_reg = insn.dst_reg; ++ patch = zext_patch; ++ patch_len = 2; ++apply_patch_buffer: ++ new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len); ++ if (!new_prog) ++ return -ENOMEM; ++ env->prog = new_prog; ++ insns = new_prog->insnsi; ++ aux = env->insn_aux_data; ++ delta += patch_len - 1; ++ } ++ ++ return 0; ++} ++ ++/* convert load instructions that access fields of a context type into a ++ * sequence of instructions that access fields of the underlying structure: ++ * struct __sk_buff -> struct sk_buff ++ * struct bpf_sock_ops -> struct sock ++ */ ++static int convert_ctx_accesses(struct bpf_verifier_env *env) ++{ ++ const struct bpf_verifier_ops *ops = env->ops; ++ int i, cnt, size, ctx_field_size, delta = 0; ++ const int insn_cnt = env->prog->len; ++ struct bpf_insn insn_buf[16], *insn; ++ u32 target_size, size_default, off; + struct bpf_prog *new_prog; +- u32 cnt; +- int i; + enum bpf_access_type type; ++ bool is_narrower_load; ++ ++ if (ops->gen_prologue || env->seen_direct_write) { ++ if (!ops->gen_prologue) { ++ verbose(env, "bpf verifier is misconfigured\n"); ++ return -EINVAL; ++ } ++ cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, ++ env->prog); ++ if (cnt >= ARRAY_SIZE(insn_buf)) { ++ verbose(env, "bpf verifier is misconfigured\n"); ++ return -EINVAL; ++ } else if (cnt) { ++ new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ env->prog = new_prog; ++ delta += cnt - 1; ++ } ++ } + +- if (!env->prog->aux->ops->convert_ctx_access) ++ if (bpf_prog_is_dev_bound(env->prog->aux)) + return 0; + ++ insn = env->prog->insnsi + delta; ++ + for (i = 0; i < insn_cnt; i++, insn++) { +- if (insn->code == (BPF_LDX | BPF_MEM | BPF_W)) ++ bpf_convert_ctx_access_t convert_ctx_access; ++ ++ if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || ++ insn->code == (BPF_LDX | BPF_MEM | BPF_H) || ++ insn->code == (BPF_LDX | BPF_MEM | BPF_W) || ++ insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) + type = BPF_READ; +- else if (insn->code == (BPF_STX | BPF_MEM | BPF_W)) ++ else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || ++ insn->code == (BPF_STX | BPF_MEM | BPF_H) || ++ insn->code == (BPF_STX | BPF_MEM | BPF_W) || ++ insn->code == (BPF_STX | BPF_MEM | BPF_DW)) + type = BPF_WRITE; + else + continue; + +- if (insn->imm != PTR_TO_CTX) { +- /* clear internal mark */ +- insn->imm = 0; ++ if (type == BPF_WRITE && ++ env->insn_aux_data[i + delta].sanitize_stack_off) { ++ struct bpf_insn patch[] = { ++ /* Sanitize suspicious stack slot with zero. ++ * There are no memory dependencies for this store, ++ * since it's only using frame pointer and immediate ++ * constant of zero ++ */ ++ BPF_ST_MEM(BPF_DW, BPF_REG_FP, ++ env->insn_aux_data[i + delta].sanitize_stack_off, ++ 0), ++ /* the original STX instruction will immediately ++ * overwrite the same stack slot with appropriate value ++ */ ++ *insn, ++ }; ++ ++ cnt = ARRAY_SIZE(patch); ++ new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ ++ switch (env->insn_aux_data[i + delta].ptr_type) { ++ case PTR_TO_CTX: ++ if (!ops->convert_ctx_access) ++ continue; ++ convert_ctx_access = ops->convert_ctx_access; ++ break; ++ default: + continue; + } + +- cnt = env->prog->aux->ops-> +- convert_ctx_access(type, insn->dst_reg, insn->src_reg, +- insn->off, insn_buf, env->prog); +- if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { +- verbose("bpf verifier is misconfigured\n"); ++ ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; ++ size = BPF_LDST_BYTES(insn); ++ ++ /* If the read access is a narrower load of the field, ++ * convert to a 4/8-byte load, to minimum program type specific ++ * convert_ctx_access changes. If conversion is successful, ++ * we will apply proper mask to the result. ++ */ ++ is_narrower_load = size < ctx_field_size; ++ size_default = bpf_ctx_off_adjust_machine(ctx_field_size); ++ off = insn->off; ++ if (is_narrower_load) { ++ u8 size_code; ++ ++ if (type == BPF_WRITE) { ++ verbose(env, "bpf verifier narrow ctx access misconfigured\n"); ++ return -EINVAL; ++ } ++ ++ size_code = BPF_H; ++ if (ctx_field_size == 4) ++ size_code = BPF_W; ++ else if (ctx_field_size == 8) ++ size_code = BPF_DW; ++ ++ insn->off = off & ~(size_default - 1); ++ insn->code = BPF_LDX | BPF_MEM | size_code; ++ } ++ ++ target_size = 0; ++ cnt = convert_ctx_access(type, insn, insn_buf, env->prog, ++ &target_size); ++ if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || ++ (ctx_field_size && !target_size)) { ++ verbose(env, "bpf verifier is misconfigured\n"); + return -EINVAL; + } + +- if (cnt == 1) { +- memcpy(insn, insn_buf, sizeof(*insn)); +- continue; ++ if (is_narrower_load && size < target_size) { ++ u8 shift = bpf_ctx_narrow_access_offset( ++ off, size, size_default) * 8; ++ if (ctx_field_size <= 4) { ++ if (shift) ++ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, ++ insn->dst_reg, ++ shift); ++ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, ++ (1 << size * 8) - 1); ++ } else { ++ if (shift) ++ insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, ++ insn->dst_reg, ++ shift); ++ insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, ++ (1ULL << size * 8) - 1); ++ } + } + +- /* several new insns need to be inserted. Make room for them */ +- insn_cnt += cnt - 1; +- new_prog = bpf_prog_realloc(env->prog, +- bpf_prog_size(insn_cnt), +- GFP_USER); ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + +- new_prog->len = insn_cnt; ++ delta += cnt - 1; + +- memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1, +- sizeof(*insn) * (insn_cnt - i - cnt)); ++ /* keep walking new program and skip insns we just inserted */ ++ env->prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ } + +- /* copy substitute insns in place of load instruction */ +- memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt); ++ return 0; ++} + +- /* adjust branches in the whole program */ +- adjust_branches(new_prog, i, cnt - 1); ++static int jit_subprogs(struct bpf_verifier_env *env) ++{ ++ struct bpf_prog *prog = env->prog, **func, *tmp; ++ int i, j, subprog_start, subprog_end = 0, len, subprog; ++ struct bpf_insn *insn; ++ void *old_bpf_func; ++ int err; + +- /* keep walking new program and skip insns we just inserted */ +- env->prog = new_prog; +- insn = new_prog->insnsi + i + cnt - 1; +- i += cnt - 1; ++ if (env->subprog_cnt <= 1) ++ return 0; ++ ++ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { ++ if (insn->code != (BPF_JMP | BPF_CALL) || ++ insn->src_reg != BPF_PSEUDO_CALL) ++ continue; ++ /* Upon error here we cannot fall back to interpreter but ++ * need a hard reject of the program. Thus -EFAULT is ++ * propagated in any case. ++ */ ++ subprog = find_subprog(env, i + insn->imm + 1); ++ if (subprog < 0) { ++ WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", ++ i + insn->imm + 1); ++ return -EFAULT; ++ } ++ /* temporarily remember subprog id inside insn instead of ++ * aux_data, since next loop will split up all insns into funcs ++ */ ++ insn->off = subprog; ++ /* remember original imm in case JIT fails and fallback ++ * to interpreter will be needed ++ */ ++ env->insn_aux_data[i].call_imm = insn->imm; ++ /* point imm to __bpf_call_base+1 from JITs point of view */ ++ insn->imm = 1; ++ } ++ ++ err = bpf_prog_alloc_jited_linfo(prog); ++ if (err) ++ goto out_undo_insn; ++ ++ err = -ENOMEM; ++ func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL); ++ if (!func) ++ goto out_undo_insn; ++ ++ for (i = 0; i < env->subprog_cnt; i++) { ++ subprog_start = subprog_end; ++ subprog_end = env->subprog_info[i + 1].start; ++ ++ len = subprog_end - subprog_start; ++ /* BPF_PROG_RUN doesn't call subprogs directly, ++ * hence main prog stats include the runtime of subprogs. ++ * subprogs don't have IDs and not reachable via prog_get_next_id ++ * func[i]->aux->stats will never be accessed and stays NULL ++ */ ++ func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); ++ if (!func[i]) ++ goto out_free; ++ memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], ++ len * sizeof(struct bpf_insn)); ++ func[i]->type = prog->type; ++ func[i]->len = len; ++ if (bpf_prog_calc_tag(func[i])) ++ goto out_free; ++ func[i]->is_func = 1; ++ func[i]->aux->func_idx = i; ++ /* the btf and func_info will be freed only at prog->aux */ ++ func[i]->aux->btf = prog->aux->btf; ++ func[i]->aux->func_info = prog->aux->func_info; ++ ++ /* Use bpf_prog_F_tag to indicate functions in stack traces. ++ * Long term would need debug info to populate names ++ */ ++ func[i]->aux->name[0] = 'F'; ++ func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; ++ func[i]->jit_requested = 1; ++ func[i]->aux->linfo = prog->aux->linfo; ++ func[i]->aux->nr_linfo = prog->aux->nr_linfo; ++ func[i]->aux->jited_linfo = prog->aux->jited_linfo; ++ func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; ++ func[i] = bpf_int_jit_compile(func[i]); ++ if (!func[i]->jited) { ++ err = -ENOTSUPP; ++ goto out_free; ++ } ++ cond_resched(); ++ } ++ /* at this point all bpf functions were successfully JITed ++ * now populate all bpf_calls with correct addresses and ++ * run last pass of JIT ++ */ ++ for (i = 0; i < env->subprog_cnt; i++) { ++ insn = func[i]->insnsi; ++ for (j = 0; j < func[i]->len; j++, insn++) { ++ if (insn->code != (BPF_JMP | BPF_CALL) || ++ insn->src_reg != BPF_PSEUDO_CALL) ++ continue; ++ subprog = insn->off; ++ insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - ++ __bpf_call_base; ++ } ++ ++ /* we use the aux data to keep a list of the start addresses ++ * of the JITed images for each function in the program ++ * ++ * for some architectures, such as powerpc64, the imm field ++ * might not be large enough to hold the offset of the start ++ * address of the callee's JITed image from __bpf_call_base ++ * ++ * in such cases, we can lookup the start address of a callee ++ * by using its subprog id, available from the off field of ++ * the call instruction, as an index for this list ++ */ ++ func[i]->aux->func = func; ++ func[i]->aux->func_cnt = env->subprog_cnt; ++ } ++ for (i = 0; i < env->subprog_cnt; i++) { ++ old_bpf_func = func[i]->bpf_func; ++ tmp = bpf_int_jit_compile(func[i]); ++ if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { ++ verbose(env, "JIT doesn't support bpf-to-bpf calls\n"); ++ err = -ENOTSUPP; ++ goto out_free; ++ } ++ cond_resched(); ++ } ++ ++ /* finally lock prog and jit images for all functions and ++ * populate kallsysm ++ */ ++ for (i = 0; i < env->subprog_cnt; i++) { ++ bpf_prog_lock_ro(func[i]); ++ bpf_prog_kallsyms_add(func[i]); ++ } ++ ++ /* Last step: make now unused interpreter insns from main ++ * prog consistent for later dump requests, so they can ++ * later look the same as if they were interpreted only. ++ */ ++ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { ++ if (insn->code != (BPF_JMP | BPF_CALL) || ++ insn->src_reg != BPF_PSEUDO_CALL) ++ continue; ++ insn->off = env->insn_aux_data[i].call_imm; ++ subprog = find_subprog(env, i + insn->off + 1); ++ insn->imm = subprog; + } + ++ prog->jited = 1; ++ prog->bpf_func = func[0]->bpf_func; ++ prog->aux->func = func; ++ prog->aux->func_cnt = env->subprog_cnt; ++ bpf_prog_free_unused_jited_linfo(prog); + return 0; ++out_free: ++ for (i = 0; i < env->subprog_cnt; i++) ++ if (func[i]) ++ bpf_jit_free(func[i]); ++ kfree(func); ++out_undo_insn: ++ /* cleanup main prog to be interpreted */ ++ prog->jit_requested = 0; ++ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { ++ if (insn->code != (BPF_JMP | BPF_CALL) || ++ insn->src_reg != BPF_PSEUDO_CALL) ++ continue; ++ insn->off = 0; ++ insn->imm = env->insn_aux_data[i].call_imm; ++ } ++ bpf_prog_free_jited_linfo(prog); ++ return err; + } + +-static void free_states(struct verifier_env *env) ++static int fixup_call_args(struct bpf_verifier_env *env) + { +- struct verifier_state_list *sl, *sln; ++#ifndef CONFIG_BPF_JIT_ALWAYS_ON ++ struct bpf_prog *prog = env->prog; ++ struct bpf_insn *insn = prog->insnsi; ++ int i, depth; ++#endif ++ int err = 0; ++ ++ if (env->prog->jit_requested && ++ !bpf_prog_is_dev_bound(env->prog->aux)) { ++ err = jit_subprogs(env); ++ if (err == 0) ++ return 0; ++ if (err == -EFAULT) ++ return err; ++ } ++#ifndef CONFIG_BPF_JIT_ALWAYS_ON ++ for (i = 0; i < prog->len; i++, insn++) { ++ if (insn->code != (BPF_JMP | BPF_CALL) || ++ insn->src_reg != BPF_PSEUDO_CALL) ++ continue; ++ depth = get_callee_stack_depth(env, insn, i); ++ if (depth < 0) ++ return depth; ++ bpf_patch_call_args(insn, depth); ++ } ++ err = 0; ++#endif ++ return err; ++} ++ ++/* fixup insn->imm field of bpf_call instructions ++ * and inline eligible helpers as explicit sequence of BPF instructions ++ * ++ * this function is called after eBPF program passed verification ++ */ ++static int fixup_bpf_calls(struct bpf_verifier_env *env) ++{ ++ struct bpf_prog *prog = env->prog; ++ struct bpf_insn *insn = prog->insnsi; ++ const struct bpf_func_proto *fn; ++ const int insn_cnt = prog->len; ++ const struct bpf_map_ops *ops; ++ struct bpf_insn_aux_data *aux; ++ struct bpf_insn insn_buf[16]; ++ struct bpf_prog *new_prog; ++ struct bpf_map *map_ptr; ++ int i, cnt, delta = 0; ++ ++ for (i = 0; i < insn_cnt; i++, insn++) { ++ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || ++ insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || ++ insn->code == (BPF_ALU | BPF_MOD | BPF_X) || ++ insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { ++ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; ++ bool isdiv = BPF_OP(insn->code) == BPF_DIV; ++ struct bpf_insn *patchlet; ++ struct bpf_insn chk_and_div[] = { ++ /* [R,W]x div 0 -> 0 */ ++ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | ++ BPF_JNE | BPF_K, insn->src_reg, ++ 0, 2, 0), ++ BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg), ++ BPF_JMP_IMM(BPF_JA, 0, 0, 1), ++ *insn, ++ }; ++ struct bpf_insn chk_and_mod[] = { ++ /* [R,W]x mod 0 -> [R,W]x */ ++ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | ++ BPF_JEQ | BPF_K, insn->src_reg, ++ 0, 1 + (is64 ? 0 : 1), 0), ++ *insn, ++ BPF_JMP_IMM(BPF_JA, 0, 0, 1), ++ BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), ++ }; ++ ++ patchlet = isdiv ? chk_and_div : chk_and_mod; ++ cnt = isdiv ? ARRAY_SIZE(chk_and_div) : ++ ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); ++ ++ new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ ++ if (BPF_CLASS(insn->code) == BPF_LD && ++ (BPF_MODE(insn->code) == BPF_ABS || ++ BPF_MODE(insn->code) == BPF_IND)) { ++ cnt = env->ops->gen_ld_abs(insn, insn_buf); ++ if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { ++ verbose(env, "bpf verifier is misconfigured\n"); ++ return -EINVAL; ++ } ++ ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ ++ if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || ++ insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { ++ const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; ++ const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; ++ struct bpf_insn insn_buf[16]; ++ struct bpf_insn *patch = &insn_buf[0]; ++ bool issrc, isneg, isimm; ++ u32 off_reg; ++ ++ aux = &env->insn_aux_data[i + delta]; ++ if (!aux->alu_state || ++ aux->alu_state == BPF_ALU_NON_POINTER) ++ continue; ++ ++ isneg = aux->alu_state & BPF_ALU_NEG_VALUE; ++ issrc = (aux->alu_state & BPF_ALU_SANITIZE) == ++ BPF_ALU_SANITIZE_SRC; ++ isimm = aux->alu_state & BPF_ALU_IMMEDIATE; ++ ++ off_reg = issrc ? insn->src_reg : insn->dst_reg; ++ if (isimm) { ++ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); ++ } else { ++ if (isneg) ++ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); ++ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); ++ *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); ++ *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); ++ *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); ++ *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); ++ *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg); ++ } ++ if (!issrc) ++ *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg); ++ insn->src_reg = BPF_REG_AX; ++ if (isneg) ++ insn->code = insn->code == code_add ? ++ code_sub : code_add; ++ *patch++ = *insn; ++ if (issrc && isneg && !isimm) ++ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); ++ cnt = patch - insn_buf; ++ ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ ++ if (insn->code != (BPF_JMP | BPF_CALL)) ++ continue; ++ if (insn->src_reg == BPF_PSEUDO_CALL) ++ continue; ++ ++ if (insn->imm == BPF_FUNC_get_route_realm) ++ prog->dst_needed = 1; ++ if (insn->imm == BPF_FUNC_get_prandom_u32) ++ bpf_user_rnd_init_once(); ++ if (insn->imm == BPF_FUNC_override_return) ++ prog->kprobe_override = 1; ++ if (insn->imm == BPF_FUNC_tail_call) { ++ /* If we tail call into other programs, we ++ * cannot make any assumptions since they can ++ * be replaced dynamically during runtime in ++ * the program array. ++ */ ++ prog->cb_access = 1; ++ env->prog->aux->stack_depth = MAX_BPF_STACK; ++ env->prog->aux->max_pkt_offset = MAX_PACKET_OFF; ++ ++ /* mark bpf_tail_call as different opcode to avoid ++ * conditional branch in the interpeter for every normal ++ * call and to prevent accidental JITing by JIT compiler ++ * that doesn't support bpf_tail_call yet ++ */ ++ insn->imm = 0; ++ insn->code = BPF_JMP | BPF_TAIL_CALL; ++ ++ aux = &env->insn_aux_data[i + delta]; ++ if (!bpf_map_ptr_unpriv(aux)) ++ continue; ++ ++ /* instead of changing every JIT dealing with tail_call ++ * emit two extra insns: ++ * if (index >= max_entries) goto out; ++ * index &= array->index_mask; ++ * to avoid out-of-bounds cpu speculation ++ */ ++ if (bpf_map_ptr_poisoned(aux)) { ++ verbose(env, "tail_call abusing map_ptr\n"); ++ return -EINVAL; ++ } ++ ++ map_ptr = BPF_MAP_PTR(aux->map_state); ++ insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, ++ map_ptr->max_entries, 2); ++ insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, ++ container_of(map_ptr, ++ struct bpf_array, ++ map)->index_mask); ++ insn_buf[2] = *insn; ++ cnt = 3; ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ ++ /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup ++ * and other inlining handlers are currently limited to 64 bit ++ * only. ++ */ ++ if (prog->jit_requested && BITS_PER_LONG == 64 && ++ (insn->imm == BPF_FUNC_map_lookup_elem || ++ insn->imm == BPF_FUNC_map_update_elem || ++ insn->imm == BPF_FUNC_map_delete_elem || ++ insn->imm == BPF_FUNC_map_push_elem || ++ insn->imm == BPF_FUNC_map_pop_elem || ++ insn->imm == BPF_FUNC_map_peek_elem)) { ++ aux = &env->insn_aux_data[i + delta]; ++ if (bpf_map_ptr_poisoned(aux)) ++ goto patch_call_imm; ++ ++ map_ptr = BPF_MAP_PTR(aux->map_state); ++ ops = map_ptr->ops; ++ if (insn->imm == BPF_FUNC_map_lookup_elem && ++ ops->map_gen_lookup) { ++ cnt = ops->map_gen_lookup(map_ptr, insn_buf); ++ if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { ++ verbose(env, "bpf verifier is misconfigured\n"); ++ return -EINVAL; ++ } ++ ++ new_prog = bpf_patch_insn_data(env, i + delta, ++ insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ ++ BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, ++ (void *(*)(struct bpf_map *map, void *key))NULL)); ++ BUILD_BUG_ON(!__same_type(ops->map_delete_elem, ++ (int (*)(struct bpf_map *map, void *key))NULL)); ++ BUILD_BUG_ON(!__same_type(ops->map_update_elem, ++ (int (*)(struct bpf_map *map, void *key, void *value, ++ u64 flags))NULL)); ++ BUILD_BUG_ON(!__same_type(ops->map_push_elem, ++ (int (*)(struct bpf_map *map, void *value, ++ u64 flags))NULL)); ++ BUILD_BUG_ON(!__same_type(ops->map_pop_elem, ++ (int (*)(struct bpf_map *map, void *value))NULL)); ++ BUILD_BUG_ON(!__same_type(ops->map_peek_elem, ++ (int (*)(struct bpf_map *map, void *value))NULL)); ++ ++ switch (insn->imm) { ++ case BPF_FUNC_map_lookup_elem: ++ insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - ++ __bpf_call_base; ++ continue; ++ case BPF_FUNC_map_update_elem: ++ insn->imm = BPF_CAST_CALL(ops->map_update_elem) - ++ __bpf_call_base; ++ continue; ++ case BPF_FUNC_map_delete_elem: ++ insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - ++ __bpf_call_base; ++ continue; ++ case BPF_FUNC_map_push_elem: ++ insn->imm = BPF_CAST_CALL(ops->map_push_elem) - ++ __bpf_call_base; ++ continue; ++ case BPF_FUNC_map_pop_elem: ++ insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - ++ __bpf_call_base; ++ continue; ++ case BPF_FUNC_map_peek_elem: ++ insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - ++ __bpf_call_base; ++ continue; ++ } ++ ++ goto patch_call_imm; ++ } ++ ++patch_call_imm: ++ fn = env->ops->get_func_proto(insn->imm, env->prog); ++ /* all functions that have prototype and verifier allowed ++ * programs to call them, must be real in-kernel functions ++ */ ++ if (!fn->func) { ++ verbose(env, ++ "kernel subsystem misconfigured func %s#%d\n", ++ func_id_name(insn->imm), insn->imm); ++ return -EFAULT; ++ } ++ insn->imm = fn->func - __bpf_call_base; ++ } ++ ++ return 0; ++} ++ ++static void free_states(struct bpf_verifier_env *env) ++{ ++ struct bpf_verifier_state_list *sl, *sln; + int i; + ++ sl = env->free_list; ++ while (sl) { ++ sln = sl->next; ++ free_verifier_state(&sl->state, false); ++ kfree(sl); ++ sl = sln; ++ } ++ + if (!env->explored_states) + return; + +- for (i = 0; i < env->prog->len; i++) { ++ for (i = 0; i < state_htab_size(env); i++) { + sl = env->explored_states[i]; + +- if (sl) +- while (sl != STATE_LIST_MARK) { +- sln = sl->next; +- kfree(sl); +- sl = sln; +- } ++ while (sl) { ++ sln = sl->next; ++ free_verifier_state(&sl->state, false); ++ kfree(sl); ++ sl = sln; ++ } + } + +- kfree(env->explored_states); ++ kvfree(env->explored_states); + } + +-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) ++static void print_verification_stats(struct bpf_verifier_env *env) + { +- char __user *log_ubuf = NULL; +- struct verifier_env *env; +- int ret = -EINVAL; ++ int i; + +- if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS) +- return -E2BIG; ++ if (env->log.level & BPF_LOG_STATS) { ++ verbose(env, "verification time %lld usec\n", ++ div_u64(env->verification_time, 1000)); ++ verbose(env, "stack depth "); ++ for (i = 0; i < env->subprog_cnt; i++) { ++ u32 depth = env->subprog_info[i].stack_depth; ++ ++ verbose(env, "%d", depth); ++ if (i + 1 < env->subprog_cnt) ++ verbose(env, "+"); ++ } ++ verbose(env, "\n"); ++ } ++ verbose(env, "processed %d insns (limit %d) max_states_per_insn %d " ++ "total_states %d peak_states %d mark_read %d\n", ++ env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, ++ env->max_states_per_insn, env->total_states, ++ env->peak_states, env->longest_mark_read_walk); ++} ++ ++int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, ++ union bpf_attr __user *uattr) ++{ ++ u64 start_time = ktime_get_ns(); ++ struct bpf_verifier_env *env; ++ struct bpf_verifier_log *log; ++ int i, len, ret = -EINVAL; ++ bool is_priv; + +- /* 'struct verifier_env' can be global, but since it's not small, ++ /* no program is valid */ ++ if (ARRAY_SIZE(bpf_verifier_ops) == 0) ++ return -EINVAL; ++ ++ /* 'struct bpf_verifier_env' can be global, but since it's not small, + * allocate/free it every time bpf_check() is called + */ +- env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL); ++ env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); + if (!env) + return -ENOMEM; ++ log = &env->log; + ++ len = (*prog)->len; ++ env->insn_aux_data = ++ vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); ++ ret = -ENOMEM; ++ if (!env->insn_aux_data) ++ goto err_free_env; ++ for (i = 0; i < len; i++) ++ env->insn_aux_data[i].orig_idx = i; + env->prog = *prog; ++ env->ops = bpf_verifier_ops[env->prog->type]; ++ is_priv = capable(CAP_SYS_ADMIN); + + /* grab the mutex to protect few globals used by verifier */ +- mutex_lock(&bpf_verifier_lock); ++ if (!is_priv) ++ mutex_lock(&bpf_verifier_lock); + + if (attr->log_level || attr->log_buf || attr->log_size) { + /* user requested verbose verifier output + * and supplied buffer to store the verification trace + */ +- log_level = attr->log_level; +- log_ubuf = (char __user *) (unsigned long) attr->log_buf; +- log_size = attr->log_size; +- log_len = 0; ++ log->level = attr->log_level; ++ log->ubuf = (char __user *) (unsigned long) attr->log_buf; ++ log->len_total = attr->log_size; + + ret = -EINVAL; +- /* log_* values have to be sane */ +- if (log_size < 128 || log_size > UINT_MAX >> 8 || +- log_level == 0 || log_ubuf == NULL) +- goto free_env; +- +- ret = -ENOMEM; +- log_buf = vmalloc(log_size); +- if (!log_buf) +- goto free_env; +- } else { +- log_level = 0; +- } ++ /* log attributes have to be sane */ ++ if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 || ++ !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) ++ goto err_unlock; ++ } ++ ++ env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); ++ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) ++ env->strict_alignment = true; ++ if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) ++ env->strict_alignment = false; ++ ++ env->allow_ptr_leaks = is_priv; ++ ++ if (is_priv) ++ env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; + + ret = replace_map_fd_with_map_ptr(env); + if (ret < 0) + goto skip_full_check; + +- env->explored_states = kcalloc(env->prog->len, +- sizeof(struct verifier_state_list *), ++ env->explored_states = kcalloc(state_htab_size(env), ++ sizeof(struct bpf_verifier_state_list *), + GFP_USER); + ret = -ENOMEM; + if (!env->explored_states) + goto skip_full_check; + +- ret = check_cfg(env); ++ ret = check_subprogs(env); ++ if (ret < 0) ++ goto skip_full_check; ++ ++ ret = check_btf_info(env, attr, uattr); + if (ret < 0) + goto skip_full_check; + +- env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ++ ret = check_cfg(env); ++ if (ret < 0) ++ goto skip_full_check; + + ret = do_check(env); ++ if (env->cur_state) { ++ free_verifier_state(env->cur_state, true); ++ env->cur_state = NULL; ++ } + + skip_full_check: +- while (pop_stack(env, NULL) >= 0); ++ while (!pop_stack(env, NULL, NULL)); + free_states(env); + + if (ret == 0) ++ ret = check_max_stack_depth(env); ++ ++ /* instruction rewrites happen after this point */ ++ if (is_priv) { ++ if (ret == 0) ++ opt_hard_wire_dead_code_branches(env); ++ if (ret == 0) ++ ret = opt_remove_dead_code(env); ++ if (ret == 0) ++ ret = opt_remove_nops(env); ++ } else { ++ if (ret == 0) ++ sanitize_dead_code(env); ++ } ++ ++ if (ret == 0) + /* program is valid, convert *(u32*)(ctx + off) accesses */ + ret = convert_ctx_accesses(env); + +- if (log_level && log_len >= log_size - 1) { +- BUG_ON(log_len >= log_size); +- /* verifier log exceeded user supplied buffer */ +- ret = -ENOSPC; +- /* fall through to return what was recorded */ ++ if (ret == 0) ++ ret = fixup_bpf_calls(env); ++ ++ /* do 32-bit optimization after insn patching has done so those patched ++ * insns could be handled correctly. ++ */ ++ if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) { ++ ret = opt_subreg_zext_lo32_rnd_hi32(env, attr); ++ env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret ++ : false; + } + +- /* copy verifier log back to user space including trailing zero */ +- if (log_level && copy_to_user(log_ubuf, log_buf, log_len + 1) != 0) { ++ if (ret == 0) ++ ret = fixup_call_args(env); ++ ++ env->verification_time = ktime_get_ns() - start_time; ++ print_verification_stats(env); ++ ++ if (log->level && bpf_verifier_log_full(log)) ++ ret = -ENOSPC; ++ if (log->level && !log->ubuf) { + ret = -EFAULT; +- goto free_log_buf; ++ goto err_release_maps; + } + + if (ret == 0 && env->used_map_cnt) { +@@ -2297,7 +9580,7 @@ skip_full_check: + + if (!env->prog->aux->used_maps) { + ret = -ENOMEM; +- goto free_log_buf; ++ goto err_release_maps; + } + + memcpy(env->prog->aux->used_maps, env->used_maps, +@@ -2310,17 +9593,21 @@ skip_full_check: + convert_pseudo_ld_imm64(env); + } + +-free_log_buf: +- if (log_level) +- vfree(log_buf); +-free_env: ++ if (ret == 0) ++ adjust_btf_func(env); ++ ++err_release_maps: + if (!env->prog->aux->used_maps) + /* if we didn't copy map pointers into bpf_prog_info, release +- * them now. Otherwise free_bpf_prog_info() will release them. ++ * them now. Otherwise free_used_maps() will release them. + */ + release_maps(env); + *prog = env->prog; ++err_unlock: ++ if (!is_priv) ++ mutex_unlock(&bpf_verifier_lock); ++ vfree(env->insn_aux_data); ++err_free_env: + kfree(env); +- mutex_unlock(&bpf_verifier_lock); + return ret; + } +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + /* + * Linux Socket Filter Data Structures + */ +@@ -7,16 +8,22 @@ + #include + + #include ++#include + #include + #include + #include + #include + #include + #include +-#include ++#include ++#include ++#include ++#include ++#include + +-#include ++#include + ++#include + #include + #include + +@@ -24,6 +31,11 @@ struct sk_buff; + struct sock; + struct seccomp_data; + struct bpf_prog_aux; ++struct xdp_rxq_info; ++struct xdp_buff; ++struct sock_reuseport; ++struct ctl_table; ++struct ctl_table_header; + + /* ArgX, context and stack frame pointer register positions. Note, + * Arg1, Arg2, Arg3, etc are used as argument mappings of function +@@ -40,7 +52,26 @@ struct bpf_prog_aux; + /* Additional register mappings for converted user programs. */ + #define BPF_REG_A BPF_REG_0 + #define BPF_REG_X BPF_REG_7 +-#define BPF_REG_TMP BPF_REG_8 ++#define BPF_REG_TMP BPF_REG_2 /* scratch reg */ ++#define BPF_REG_D BPF_REG_8 /* data, callee-saved */ ++#define BPF_REG_H BPF_REG_9 /* hlen, callee-saved */ ++ ++/* Kernel hidden auxiliary/helper register. */ ++#define BPF_REG_AX MAX_BPF_REG ++#define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) ++#define MAX_BPF_JIT_REG MAX_BPF_EXT_REG ++ ++/* unused opcode to mark special call to bpf_tail_call() helper */ ++#define BPF_TAIL_CALL 0xf0 ++ ++/* unused opcode to mark call to interpreter with arguments */ ++#define BPF_CALL_ARGS 0xe0 ++ ++/* As per nm, we expose JITed images as text (code) section for ++ * kallsyms. That way, tools like perf can find it to match ++ * addresses. ++ */ ++#define BPF_SYM_ELF_TYPE 't' + + /* BPF program can access up to 512 bytes of stack space. */ + #define MAX_BPF_STACK 512 +@@ -129,6 +160,20 @@ struct bpf_prog_aux; + .off = 0, \ + .imm = IMM }) + ++/* Special form of mov32, used for doing explicit zero extension on dst. */ ++#define BPF_ZEXT_REG(DST) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_ALU | BPF_MOV | BPF_X, \ ++ .dst_reg = DST, \ ++ .src_reg = DST, \ ++ .off = 0, \ ++ .imm = 1 }) ++ ++static inline bool insn_is_zext(const struct bpf_insn *insn) ++{ ++ return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1; ++} ++ + /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ + #define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) +@@ -249,8 +294,51 @@ struct bpf_prog_aux; + .off = OFF, \ + .imm = IMM }) + ++/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ ++ ++#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ ++ .dst_reg = DST, \ ++ .src_reg = SRC, \ ++ .off = OFF, \ ++ .imm = 0 }) ++ ++/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ ++ ++#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ ++ .dst_reg = DST, \ ++ .src_reg = 0, \ ++ .off = OFF, \ ++ .imm = IMM }) ++ ++/* Unconditional jumps, goto pc + off16 */ ++ ++#define BPF_JMP_A(OFF) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_JMP | BPF_JA, \ ++ .dst_reg = 0, \ ++ .src_reg = 0, \ ++ .off = OFF, \ ++ .imm = 0 }) ++ ++/* Relative call */ ++ ++#define BPF_CALL_REL(TGT) \ ++ ((struct bpf_insn) { \ ++ .code = BPF_JMP | BPF_CALL, \ ++ .dst_reg = 0, \ ++ .src_reg = BPF_PSEUDO_CALL, \ ++ .off = 0, \ ++ .imm = TGT }) ++ + /* Function call */ + ++#define BPF_CAST_CALL(x) \ ++ ((u64 (*)(u64, u64, u64, u64, u64))(x)) ++ + #define BPF_EMIT_CALL(FUNC) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_CALL, \ +@@ -303,6 +391,112 @@ struct bpf_prog_aux; + bpf_size; \ + }) + ++#define bpf_size_to_bytes(bpf_size) \ ++({ \ ++ int bytes = -EINVAL; \ ++ \ ++ if (bpf_size == BPF_B) \ ++ bytes = sizeof(u8); \ ++ else if (bpf_size == BPF_H) \ ++ bytes = sizeof(u16); \ ++ else if (bpf_size == BPF_W) \ ++ bytes = sizeof(u32); \ ++ else if (bpf_size == BPF_DW) \ ++ bytes = sizeof(u64); \ ++ \ ++ bytes; \ ++}) ++ ++#define BPF_SIZEOF(type) \ ++ ({ \ ++ const int __size = bytes_to_bpf_size(sizeof(type)); \ ++ BUILD_BUG_ON(__size < 0); \ ++ __size; \ ++ }) ++ ++#define BPF_FIELD_SIZEOF(type, field) \ ++ ({ \ ++ const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \ ++ BUILD_BUG_ON(__size < 0); \ ++ __size; \ ++ }) ++ ++#define BPF_LDST_BYTES(insn) \ ++ ({ \ ++ const int __size = bpf_size_to_bytes(BPF_SIZE((insn)->code)); \ ++ WARN_ON(__size < 0); \ ++ __size; \ ++ }) ++ ++#define __BPF_MAP_0(m, v, ...) v ++#define __BPF_MAP_1(m, v, t, a, ...) m(t, a) ++#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__) ++#define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__) ++#define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__) ++#define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__) ++ ++#define __BPF_REG_0(...) __BPF_PAD(5) ++#define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4) ++#define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3) ++#define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2) ++#define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1) ++#define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__) ++ ++#define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__) ++#define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__) ++ ++#define __BPF_CAST(t, a) \ ++ (__force t) \ ++ (__force \ ++ typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long), \ ++ (unsigned long)0, (t)0))) a ++#define __BPF_V void ++#define __BPF_N ++ ++#define __BPF_DECL_ARGS(t, a) t a ++#define __BPF_DECL_REGS(t, a) u64 a ++ ++#define __BPF_PAD(n) \ ++ __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ ++ u64, __ur_3, u64, __ur_4, u64, __ur_5) ++ ++#define BPF_CALL_x(x, name, ...) \ ++ static __always_inline \ ++ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ ++ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ ++ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ ++ { \ ++ return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ ++ } \ ++ static __always_inline \ ++ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) ++ ++#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) ++#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) ++#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) ++#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) ++#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) ++#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) ++ ++#define bpf_ctx_range(TYPE, MEMBER) \ ++ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 ++#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2) \ ++ offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1 ++#if BITS_PER_LONG == 64 ++# define bpf_ctx_range_ptr(TYPE, MEMBER) \ ++ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 ++#else ++# define bpf_ctx_range_ptr(TYPE, MEMBER) \ ++ offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1 ++#endif /* BITS_PER_LONG == 64 */ ++ ++#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \ ++ ({ \ ++ BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE)); \ ++ *(PTR_SIZE) = (SIZE); \ ++ offsetof(TYPE, MEMBER); \ ++ }) ++ + #ifdef CONFIG_COMPAT + /* A struct sock_filter is architecture independent. */ + struct compat_sock_fprog { +@@ -317,24 +511,33 @@ struct sock_fprog_kern { + }; + + struct bpf_binary_header { +- unsigned int pages; +- u8 image[]; ++ u32 pages; ++ /* Some arches need word alignment for their instructions */ ++ u8 image[] __aligned(4); + }; + + struct bpf_prog { + u16 pages; /* Number of allocated pages */ +- kmemcheck_bitfield_begin(meta); + u16 jited:1, /* Is our filter JIT'ed? */ ++ jit_requested:1,/* archs need to JIT the prog */ ++ undo_set_mem:1, /* Passed set_memory_ro() checkpoint */ + gpl_compatible:1, /* Is filter GPL compatible? */ + cb_access:1, /* Is control block accessed? */ +- dst_needed:1; /* Do we need dst entry? */ +- kmemcheck_bitfield_end(meta); +- u32 len; /* Number of filter blocks */ ++ dst_needed:1, /* Do we need dst entry? */ ++ blinded:1, /* Was blinded */ ++ is_func:1, /* program is a bpf function */ ++ kprobe_override:1, /* Do we override a kprobe? */ ++ has_callchain_buf:1, /* callchain buffer allocated? */ ++ enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */ + enum bpf_prog_type type; /* Type of BPF program */ ++ enum bpf_attach_type expected_attach_type; /* For some prog types */ ++ u32 len; /* Number of filter blocks */ ++ u32 jited_len; /* Size of jited insns in bytes */ ++ u8 tag[BPF_TAG_SIZE]; + struct bpf_prog_aux *aux; /* Auxiliary fields */ + struct sock_fprog_kern *orig_prog; /* Original BPF program */ +- unsigned int (*bpf_func)(const struct sk_buff *skb, +- const struct bpf_insn *filter); ++ unsigned int (*bpf_func)(const void *ctx, ++ const struct bpf_insn *insn); + /* Instructions for interpreter */ + union { + struct sock_filter insns[0]; +@@ -343,44 +546,160 @@ struct bpf_prog { + }; + + struct sk_filter { +- atomic_t refcnt; ++ refcount_t refcnt; + struct rcu_head rcu; + struct bpf_prog *prog; + }; + +-#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) ++#define BPF_PROG_RUN(prog, ctx) ({ \ ++ u32 ret; \ ++ ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ ++ ret; }) ++ ++#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN ++ ++struct bpf_skb_data_end { ++ struct qdisc_skb_cb qdisc_cb; ++ void *data_meta; ++ void *data_end; ++}; + +-static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, +- struct sk_buff *skb) ++struct bpf_redirect_info { ++ u32 flags; ++ u32 tgt_index; ++ void *tgt_value; ++ struct bpf_map *map; ++ struct bpf_map *map_to_flush; ++ u32 kern_flags; ++}; ++ ++DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); ++ ++/* flags for bpf_redirect_info kern_flags */ ++#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ ++ ++/* Compute the linear packet data range [data, data_end) which ++ * will be accessed by various program types (cls_bpf, act_bpf, ++ * lwt, ...). Subsystems allowing direct data access must (!) ++ * ensure that cb[] area can be written to when BPF program is ++ * invoked (otherwise cb[] save/restore is necessary). ++ */ ++static inline void bpf_compute_data_pointers(struct sk_buff *skb) + { +- u8 *cb_data = qdisc_skb_cb(skb)->data; +- u8 saved_cb[QDISC_CB_PRIV_LEN]; +- u32 res; ++ struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; ++ ++ BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb)); ++ cb->data_meta = skb->data; ++ cb->data_end = skb->data + skb_headlen(skb); ++} + ++/* Similar to bpf_compute_data_pointers(), except that save orginal ++ * data in cb->data and cb->meta_data for restore. ++ */ ++static inline void bpf_compute_and_save_data_end( ++ struct sk_buff *skb, void **saved_data_end) ++{ ++ struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; ++ ++ *saved_data_end = cb->data_end; ++ cb->data_end = skb->data + skb_headlen(skb); ++} ++ ++/* Restore data saved by bpf_compute_data_pointers(). */ ++static inline void bpf_restore_data_end( ++ struct sk_buff *skb, void *saved_data_end) ++{ ++ struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; ++ ++ cb->data_end = saved_data_end; ++} ++ ++static inline u8 *bpf_skb_cb(struct sk_buff *skb) ++{ ++ /* eBPF programs may read/write skb->cb[] area to transfer meta ++ * data between tail calls. Since this also needs to work with ++ * tc, that scratch memory is mapped to qdisc_skb_cb's data area. ++ * ++ * In some socket filter cases, the cb unfortunately needs to be ++ * saved/restored so that protocol specific skb->cb[] data won't ++ * be lost. In any case, due to unpriviledged eBPF programs ++ * attached to sockets, we need to clear the bpf_skb_cb() area ++ * to not leak previous contents to user space. ++ */ ++ BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != BPF_SKB_CB_LEN); + BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != +- QDISC_CB_PRIV_LEN); ++ FIELD_SIZEOF(struct qdisc_skb_cb, data)); ++ ++ return qdisc_skb_cb(skb)->data; ++} ++ ++static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, ++ struct sk_buff *skb) ++{ ++ u8 *cb_data = bpf_skb_cb(skb); ++ u8 cb_saved[BPF_SKB_CB_LEN]; ++ u32 res; + + if (unlikely(prog->cb_access)) { +- memcpy(saved_cb, cb_data, sizeof(saved_cb)); +- memset(cb_data, 0, sizeof(saved_cb)); ++ memcpy(cb_saved, cb_data, sizeof(cb_saved)); ++ memset(cb_data, 0, sizeof(cb_saved)); + } + + res = BPF_PROG_RUN(prog, skb); + + if (unlikely(prog->cb_access)) +- memcpy(cb_data, saved_cb, sizeof(saved_cb)); ++ memcpy(cb_data, cb_saved, sizeof(cb_saved)); ++ ++ return res; ++} ++ ++static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, ++ struct sk_buff *skb) ++{ ++ u32 res; + ++ preempt_disable(); ++ res = __bpf_prog_run_save_cb(prog, skb); ++ preempt_enable(); + return res; + } + + static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, + struct sk_buff *skb) + { +- u8 *cb_data = qdisc_skb_cb(skb)->data; ++ u8 *cb_data = bpf_skb_cb(skb); ++ u32 res; + + if (unlikely(prog->cb_access)) +- memset(cb_data, 0, QDISC_CB_PRIV_LEN); +- return BPF_PROG_RUN(prog, skb); ++ memset(cb_data, 0, BPF_SKB_CB_LEN); ++ ++ preempt_disable(); ++ res = BPF_PROG_RUN(prog, skb); ++ preempt_enable(); ++ return res; ++} ++ ++static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, ++ struct xdp_buff *xdp) ++{ ++ /* Caller needs to hold rcu_read_lock() (!), otherwise program ++ * can be released while still running, or map elements could be ++ * freed early while still having concurrent users. XDP fastpath ++ * already takes rcu_read_lock() when fetching the program, so ++ * it's not necessary here anymore. ++ */ ++ return BPF_PROG_RUN(prog, xdp); ++} ++ ++static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) ++{ ++ return prog->len * sizeof(struct bpf_insn); ++} ++ ++static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog) ++{ ++ return round_up(bpf_prog_insn_size(prog) + ++ sizeof(__be64) + 1, SHA_MESSAGE_BYTES); + } + + static inline unsigned int bpf_prog_size(unsigned int proglen) +@@ -399,27 +718,77 @@ static inline bool bpf_prog_was_classic( + return prog->type == BPF_PROG_TYPE_UNSPEC; + } + ++static inline u32 bpf_ctx_off_adjust_machine(u32 size) ++{ ++ const u32 size_machine = sizeof(unsigned long); ++ ++ if (size > size_machine && size % size_machine == 0) ++ size = size_machine; ++ ++ return size; ++} ++ ++static inline bool ++bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) ++{ ++ return size <= size_default && (size & (size - 1)) == 0; ++} ++ ++static inline u8 ++bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default) ++{ ++ u8 access_off = off & (size_default - 1); ++ ++#ifdef __LITTLE_ENDIAN ++ return access_off; ++#else ++ return size_default - (access_off + size); ++#endif ++} ++ ++#define bpf_ctx_wide_access_ok(off, size, type, field) \ ++ (size == sizeof(__u64) && \ ++ off >= offsetof(type, field) && \ ++ off + sizeof(__u64) <= offsetofend(type, field) && \ ++ off % sizeof(__u64) == 0) ++ + #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) + +-#ifdef CONFIG_DEBUG_SET_MODULE_RONX + static inline void bpf_prog_lock_ro(struct bpf_prog *fp) + { +- set_memory_ro((unsigned long)fp, fp->pages); ++#ifndef CONFIG_BPF_JIT_ALWAYS_ON ++ if (!fp->jited) { ++ fp->undo_set_mem = 1; ++ set_memory_ro((unsigned long)fp, fp->pages); ++ } ++#endif + } + + static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) + { +- set_memory_rw((unsigned long)fp, fp->pages); ++ if (fp->undo_set_mem) ++ set_memory_rw((unsigned long)fp, fp->pages); + } +-#else +-static inline void bpf_prog_lock_ro(struct bpf_prog *fp) ++ ++static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) + { ++ set_memory_ro((unsigned long)hdr, hdr->pages); ++ set_memory_x((unsigned long)hdr, hdr->pages); + } + +-static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) ++static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) ++{ ++ set_memory_rw((unsigned long)hdr, hdr->pages); ++} ++ ++static inline struct bpf_binary_header * ++bpf_jit_binary_hdr(const struct bpf_prog *fp) + { ++ unsigned long real_start = (unsigned long)fp->bpf_func; ++ unsigned long addr = real_start & PAGE_MASK; ++ ++ return (void *)addr; + } +-#endif /* CONFIG_DEBUG_SET_MODULE_RONX */ + + int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); + static inline int sk_filter(struct sock *sk, struct sk_buff *skb) +@@ -427,10 +796,20 @@ static inline int sk_filter(struct sock + return sk_filter_trim_cap(sk, skb, 1); + } + +-int bpf_prog_select_runtime(struct bpf_prog *fp); ++struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err); + void bpf_prog_free(struct bpf_prog *fp); + ++bool bpf_opcode_in_insntable(u8 code); ++ ++void bpf_prog_free_linfo(struct bpf_prog *prog); ++void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, ++ const u32 *insn_to_jit_off); ++int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog); ++void bpf_prog_free_jited_linfo(struct bpf_prog *prog); ++void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog); ++ + struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); ++struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags); + struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, + gfp_t gfp_extra_flags); + void __bpf_prog_free(struct bpf_prog *fp); +@@ -450,12 +829,11 @@ int bpf_prog_create_from_user(struct bpf + void bpf_prog_destroy(struct bpf_prog *fp); + + int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +-int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, +- bool locked); + int sk_attach_bpf(u32 ufd, struct sock *sk); ++int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); ++int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); ++void sk_reuseport_prog_free(struct bpf_prog *prog); + int sk_detach_filter(struct sock *sk); +-int __sk_detach_filter(struct sock *sk, bool locked); +- + int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, + unsigned int len); + +@@ -463,10 +841,100 @@ bool sk_filter_charge(struct sock *sk, s + void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); + + u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +-void bpf_int_jit_compile(struct bpf_prog *fp); +-bool bpf_helper_changes_skb_data(void *func); ++#define __bpf_call_base_args \ ++ ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \ ++ (void *)__bpf_call_base) ++ ++struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); ++void bpf_jit_compile(struct bpf_prog *prog); ++bool bpf_jit_needs_zext(void); ++bool bpf_helper_changes_pkt_data(void *func); ++ ++static inline bool bpf_dump_raw_ok(const struct cred *cred) ++{ ++ /* Reconstruction of call-sites is dependent on kallsyms, ++ * thus make dump the same restriction. ++ */ ++ return true; ++} ++ ++struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, ++ const struct bpf_insn *patch, u32 len); ++int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); ++ ++void bpf_clear_redirect_map(struct bpf_map *map); ++ ++static inline bool xdp_return_frame_no_direct(void) ++{ ++ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); ++ ++ return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT; ++} ++ ++static inline void xdp_set_return_frame_no_direct(void) ++{ ++ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); ++ ++ ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT; ++} ++ ++static inline void xdp_clear_return_frame_no_direct(void) ++{ ++ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); ++ ++ ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT; ++} ++ ++static inline int xdp_ok_fwd_dev(const struct net_device *fwd, ++ unsigned int pktlen) ++{ ++ unsigned int len; ++ ++ if (unlikely(!(fwd->flags & IFF_UP))) ++ return -ENETDOWN; ++ ++ len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; ++ if (pktlen > len) ++ return -EMSGSIZE; ++ ++ return 0; ++} ++ ++/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the ++ * same cpu context. Further for best results no more than a single map ++ * for the do_redirect/do_flush pair should be used. This limitation is ++ * because we only track one map and force a flush when the map changes. ++ * This does not appear to be a real limitation for existing software. ++ */ ++int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, ++ struct xdp_buff *xdp, struct bpf_prog *prog); ++int xdp_do_redirect(struct net_device *dev, ++ struct xdp_buff *xdp, ++ struct bpf_prog *prog); ++void xdp_do_flush_map(void); ++ ++void bpf_warn_invalid_xdp_action(u32 act); ++ ++#ifdef CONFIG_INET ++struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, ++ struct bpf_prog *prog, struct sk_buff *skb, ++ u32 hash); ++#else ++static inline struct sock * ++bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, ++ struct bpf_prog *prog, struct sk_buff *skb, ++ u32 hash) ++{ ++ return NULL; ++} ++#endif + + #ifdef CONFIG_BPF_JIT ++extern int bpf_jit_enable; ++extern int bpf_jit_harden; ++extern int bpf_jit_kallsyms; ++extern long bpf_jit_limit; ++ + typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); + + struct bpf_binary_header * +@@ -474,10 +942,18 @@ bpf_jit_binary_alloc(unsigned int progle + unsigned int alignment, + bpf_jit_fill_hole_t bpf_fill_ill_insns); + void bpf_jit_binary_free(struct bpf_binary_header *hdr); +- +-void bpf_jit_compile(struct bpf_prog *fp); ++u64 bpf_jit_alloc_exec_limit(void); ++void *bpf_jit_alloc_exec(unsigned long size); ++void bpf_jit_free_exec(void *addr); + void bpf_jit_free(struct bpf_prog *fp); + ++int bpf_jit_get_func_addr(const struct bpf_prog *prog, ++ const struct bpf_insn *insn, bool extra_pass, ++ u64 *func_addr, bool *func_addr_fixed); ++ ++struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); ++void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); ++ + static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, + u32 pass, void *image) + { +@@ -488,17 +964,144 @@ static inline void bpf_jit_dump(unsigned + print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, + 16, 1, image, proglen, false); + } +-#else +-static inline void bpf_jit_compile(struct bpf_prog *fp) ++ ++static inline bool bpf_jit_is_ebpf(void) ++{ ++# ifdef CONFIG_HAVE_EBPF_JIT ++ return true; ++# else ++ return false; ++# endif ++} ++ ++static inline bool ebpf_jit_enabled(void) ++{ ++ return bpf_jit_enable && bpf_jit_is_ebpf(); ++} ++ ++static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) ++{ ++ return fp->jited && bpf_jit_is_ebpf(); ++} ++ ++static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) ++{ ++ /* These are the prerequisites, should someone ever have the ++ * idea to call blinding outside of them, we make sure to ++ * bail out. ++ */ ++ if (!bpf_jit_is_ebpf()) ++ return false; ++ if (!prog->jit_requested) ++ return false; ++ if (!bpf_jit_harden) ++ return false; ++ if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN)) ++ return false; ++ ++ return true; ++} ++ ++static inline bool bpf_jit_kallsyms_enabled(void) + { ++ /* There are a couple of corner cases where kallsyms should ++ * not be enabled f.e. on hardening. ++ */ ++ if (bpf_jit_harden) ++ return false; ++ if (!bpf_jit_kallsyms) ++ return false; ++ if (bpf_jit_kallsyms == 1) ++ return true; ++ ++ return false; ++} ++ ++const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, ++ unsigned long *off, char *sym); ++bool is_bpf_text_address(unsigned long addr); ++int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, ++ char *sym); ++ ++static inline const char * ++bpf_address_lookup(unsigned long addr, unsigned long *size, ++ unsigned long *off, char **modname, char *sym) ++{ ++ const char *ret = __bpf_address_lookup(addr, size, off, sym); ++ ++ if (ret && modname) ++ *modname = NULL; ++ return ret; ++} ++ ++void bpf_prog_kallsyms_add(struct bpf_prog *fp); ++void bpf_prog_kallsyms_del(struct bpf_prog *fp); ++void bpf_get_prog_name(const struct bpf_prog *prog, char *sym); ++ ++#else /* CONFIG_BPF_JIT */ ++ ++static inline bool ebpf_jit_enabled(void) ++{ ++ return false; ++} ++ ++static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) ++{ ++ return false; + } + + static inline void bpf_jit_free(struct bpf_prog *fp) + { + bpf_prog_unlock_free(fp); + } ++ ++static inline bool bpf_jit_kallsyms_enabled(void) ++{ ++ return false; ++} ++ ++static inline const char * ++__bpf_address_lookup(unsigned long addr, unsigned long *size, ++ unsigned long *off, char *sym) ++{ ++ return NULL; ++} ++ ++static inline bool is_bpf_text_address(unsigned long addr) ++{ ++ return false; ++} ++ ++static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, ++ char *type, char *sym) ++{ ++ return -ERANGE; ++} ++ ++static inline const char * ++bpf_address_lookup(unsigned long addr, unsigned long *size, ++ unsigned long *off, char **modname, char *sym) ++{ ++ return NULL; ++} ++ ++static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) ++{ ++} ++ ++static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) ++{ ++} ++ ++static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) ++{ ++ sym[0] = '\0'; ++} ++ + #endif /* CONFIG_BPF_JIT */ + ++void bpf_prog_kallsyms_del_all(struct bpf_prog *fp); ++ + #define BPF_ANC BIT(15) + + static inline bool bpf_needs_clear_a(const struct sock_filter *first) +@@ -571,4 +1174,59 @@ static inline int bpf_tell_extensions(vo + return SKF_AD_MAX; + } + ++struct bpf_sock_addr_kern { ++ struct sock *sk; ++ struct sockaddr *uaddr; ++ /* Temporary "register" to make indirect stores to nested structures ++ * defined above. We need three registers to make such a store, but ++ * only two (src and dst) are available at convert_ctx_access time ++ */ ++ u64 tmp_reg; ++ void *t_ctx; /* Attach type specific context. */ ++}; ++ ++struct bpf_sock_ops_kern { ++ struct sock *sk; ++ u32 op; ++ union { ++ u32 args[4]; ++ u32 reply; ++ u32 replylong[4]; ++ }; ++ u32 is_fullsock; ++ u64 temp; /* temp and everything after is not ++ * initialized to 0 before calling ++ * the BPF program. New fields that ++ * should be initialized to 0 should ++ * be inserted before temp. ++ * temp is scratch storage used by ++ * sock_ops_convert_ctx_access ++ * as temporary storage of a register. ++ */ ++}; ++ ++struct bpf_sysctl_kern { ++ struct ctl_table_header *head; ++ struct ctl_table *table; ++ void *cur_val; ++ size_t cur_len; ++ void *new_val; ++ size_t new_len; ++ int new_updated; ++ int write; ++ loff_t *ppos; ++ /* Temporary "register" for indirect stores to ppos. */ ++ u64 tmp_reg; ++}; ++ ++struct bpf_sockopt_kern { ++ struct sock *sk; ++ u8 *optval; ++ u8 *optval_end; ++ s32 level; ++ s32 optname; ++ s32 optlen; ++ s32 retval; ++}; ++ + #endif /* __LINUX_FILTER_H__ */ +--- /dev/null ++++ b/include/linux/set_memory.h +@@ -0,0 +1,47 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright 2017, Michael Ellerman, IBM Corporation. ++ */ ++#ifndef _LINUX_SET_MEMORY_H_ ++#define _LINUX_SET_MEMORY_H_ ++ ++#include ++ ++#ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP ++static inline int set_direct_map_invalid_noflush(struct page *page) ++{ ++ return 0; ++} ++static inline int set_direct_map_default_noflush(struct page *page) ++{ ++ return 0; ++} ++#endif ++ ++#ifndef set_mce_nospec ++static inline int set_mce_nospec(unsigned long pfn, bool unmap) ++{ ++ return 0; ++} ++#endif ++ ++#ifndef clear_mce_nospec ++static inline int clear_mce_nospec(unsigned long pfn) ++{ ++ return 0; ++} ++#endif ++ ++#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT ++static inline int set_memory_encrypted(unsigned long addr, int numpages) ++{ ++ return 0; ++} ++ ++static inline int set_memory_decrypted(unsigned long addr, int numpages) ++{ ++ return 0; ++} ++#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */ ++ ++#endif /* _LINUX_SET_MEMORY_H_ */ +--- /dev/null ++++ b/include/trace/events/xdp.h +@@ -0,0 +1,407 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM xdp ++ ++#if !defined(_TRACE_XDP_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_XDP_H ++ ++#include ++#include ++#include ++#include ++ ++#define __XDP_ACT_MAP(FN) \ ++ FN(ABORTED) \ ++ FN(DROP) \ ++ FN(PASS) \ ++ FN(TX) \ ++ FN(REDIRECT) ++ ++#define __XDP_ACT_TP_FN(x) \ ++ TRACE_DEFINE_ENUM(XDP_##x); ++#define __XDP_ACT_SYM_FN(x) \ ++ { XDP_##x, #x }, ++#define __XDP_ACT_SYM_TAB \ ++ __XDP_ACT_MAP(__XDP_ACT_SYM_FN) { -1, 0 } ++__XDP_ACT_MAP(__XDP_ACT_TP_FN) ++ ++TRACE_EVENT(xdp_exception, ++ ++ TP_PROTO(const struct net_device *dev, ++ const struct bpf_prog *xdp, u32 act), ++ ++ TP_ARGS(dev, xdp, act), ++ ++ TP_STRUCT__entry( ++ __field(int, prog_id) ++ __field(u32, act) ++ __field(int, ifindex) ++ ), ++ ++ TP_fast_assign( ++ __entry->prog_id = xdp->aux->id; ++ __entry->act = act; ++ __entry->ifindex = dev->ifindex; ++ ), ++ ++ TP_printk("prog_id=%d action=%s ifindex=%d", ++ __entry->prog_id, ++ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), ++ __entry->ifindex) ++); ++ ++TRACE_EVENT(xdp_bulk_tx, ++ ++ TP_PROTO(const struct net_device *dev, ++ int sent, int drops, int err), ++ ++ TP_ARGS(dev, sent, drops, err), ++ ++ TP_STRUCT__entry( ++ __field(int, ifindex) ++ __field(u32, act) ++ __field(int, drops) ++ __field(int, sent) ++ __field(int, err) ++ ), ++ ++ TP_fast_assign( ++ __entry->ifindex = dev->ifindex; ++ __entry->act = XDP_TX; ++ __entry->drops = drops; ++ __entry->sent = sent; ++ __entry->err = err; ++ ), ++ ++ TP_printk("ifindex=%d action=%s sent=%d drops=%d err=%d", ++ __entry->ifindex, ++ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), ++ __entry->sent, __entry->drops, __entry->err) ++); ++ ++DECLARE_EVENT_CLASS(xdp_redirect_template, ++ ++ TP_PROTO(const struct net_device *dev, ++ const struct bpf_prog *xdp, ++ int to_ifindex, int err, ++ const struct bpf_map *map, u32 map_index), ++ ++ TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), ++ ++ TP_STRUCT__entry( ++ __field(int, prog_id) ++ __field(u32, act) ++ __field(int, ifindex) ++ __field(int, err) ++ __field(int, to_ifindex) ++ __field(u32, map_id) ++ __field(int, map_index) ++ ), ++ ++ TP_fast_assign( ++ __entry->prog_id = xdp->aux->id; ++ __entry->act = XDP_REDIRECT; ++ __entry->ifindex = dev->ifindex; ++ __entry->err = err; ++ __entry->to_ifindex = to_ifindex; ++ __entry->map_id = map ? map->id : 0; ++ __entry->map_index = map_index; ++ ), ++ ++ TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d", ++ __entry->prog_id, ++ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), ++ __entry->ifindex, __entry->to_ifindex, ++ __entry->err) ++); ++ ++DEFINE_EVENT(xdp_redirect_template, xdp_redirect, ++ TP_PROTO(const struct net_device *dev, ++ const struct bpf_prog *xdp, ++ int to_ifindex, int err, ++ const struct bpf_map *map, u32 map_index), ++ TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) ++); ++ ++DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, ++ TP_PROTO(const struct net_device *dev, ++ const struct bpf_prog *xdp, ++ int to_ifindex, int err, ++ const struct bpf_map *map, u32 map_index), ++ TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) ++); ++ ++#define _trace_xdp_redirect(dev, xdp, to) \ ++ trace_xdp_redirect(dev, xdp, to, 0, NULL, 0); ++ ++#define _trace_xdp_redirect_err(dev, xdp, to, err) \ ++ trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0); ++ ++DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map, ++ TP_PROTO(const struct net_device *dev, ++ const struct bpf_prog *xdp, ++ int to_ifindex, int err, ++ const struct bpf_map *map, u32 map_index), ++ TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), ++ TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" ++ " map_id=%d map_index=%d", ++ __entry->prog_id, ++ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), ++ __entry->ifindex, __entry->to_ifindex, ++ __entry->err, ++ __entry->map_id, __entry->map_index) ++); ++ ++DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, ++ TP_PROTO(const struct net_device *dev, ++ const struct bpf_prog *xdp, ++ int to_ifindex, int err, ++ const struct bpf_map *map, u32 map_index), ++ TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), ++ TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" ++ " map_id=%d map_index=%d", ++ __entry->prog_id, ++ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), ++ __entry->ifindex, __entry->to_ifindex, ++ __entry->err, ++ __entry->map_id, __entry->map_index) ++); ++ ++#ifndef __DEVMAP_OBJ_TYPE ++#define __DEVMAP_OBJ_TYPE ++struct _bpf_dtab_netdev { ++ struct net_device *dev; ++}; ++#endif /* __DEVMAP_OBJ_TYPE */ ++ ++#define devmap_ifindex(fwd, map) \ ++ ((map->map_type == BPF_MAP_TYPE_DEVMAP || \ ++ map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) ? \ ++ ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0) ++ ++#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ ++ trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ ++ 0, map, idx) ++ ++#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ ++ trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map), \ ++ err, map, idx) ++ ++TRACE_EVENT(xdp_cpumap_kthread, ++ ++ TP_PROTO(int map_id, unsigned int processed, unsigned int drops, ++ int sched), ++ ++ TP_ARGS(map_id, processed, drops, sched), ++ ++ TP_STRUCT__entry( ++ __field(int, map_id) ++ __field(u32, act) ++ __field(int, cpu) ++ __field(unsigned int, drops) ++ __field(unsigned int, processed) ++ __field(int, sched) ++ ), ++ ++ TP_fast_assign( ++ __entry->map_id = map_id; ++ __entry->act = XDP_REDIRECT; ++ __entry->cpu = smp_processor_id(); ++ __entry->drops = drops; ++ __entry->processed = processed; ++ __entry->sched = sched; ++ ), ++ ++ TP_printk("kthread" ++ " cpu=%d map_id=%d action=%s" ++ " processed=%u drops=%u" ++ " sched=%d", ++ __entry->cpu, __entry->map_id, ++ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), ++ __entry->processed, __entry->drops, ++ __entry->sched) ++); ++ ++TRACE_EVENT(xdp_cpumap_enqueue, ++ ++ TP_PROTO(int map_id, unsigned int processed, unsigned int drops, ++ int to_cpu), ++ ++ TP_ARGS(map_id, processed, drops, to_cpu), ++ ++ TP_STRUCT__entry( ++ __field(int, map_id) ++ __field(u32, act) ++ __field(int, cpu) ++ __field(unsigned int, drops) ++ __field(unsigned int, processed) ++ __field(int, to_cpu) ++ ), ++ ++ TP_fast_assign( ++ __entry->map_id = map_id; ++ __entry->act = XDP_REDIRECT; ++ __entry->cpu = smp_processor_id(); ++ __entry->drops = drops; ++ __entry->processed = processed; ++ __entry->to_cpu = to_cpu; ++ ), ++ ++ TP_printk("enqueue" ++ " cpu=%d map_id=%d action=%s" ++ " processed=%u drops=%u" ++ " to_cpu=%d", ++ __entry->cpu, __entry->map_id, ++ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), ++ __entry->processed, __entry->drops, ++ __entry->to_cpu) ++); ++ ++TRACE_EVENT(xdp_devmap_xmit, ++ ++ TP_PROTO(const struct bpf_map *map, u32 map_index, ++ int sent, int drops, ++ const struct net_device *from_dev, ++ const struct net_device *to_dev, int err), ++ ++ TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err), ++ ++ TP_STRUCT__entry( ++ __field(int, map_id) ++ __field(u32, act) ++ __field(u32, map_index) ++ __field(int, drops) ++ __field(int, sent) ++ __field(int, from_ifindex) ++ __field(int, to_ifindex) ++ __field(int, err) ++ ), ++ ++ TP_fast_assign( ++ __entry->map_id = map->id; ++ __entry->act = XDP_REDIRECT; ++ __entry->map_index = map_index; ++ __entry->drops = drops; ++ __entry->sent = sent; ++ __entry->from_ifindex = from_dev->ifindex; ++ __entry->to_ifindex = to_dev->ifindex; ++ __entry->err = err; ++ ), ++ ++ TP_printk("ndo_xdp_xmit" ++ " map_id=%d map_index=%d action=%s" ++ " sent=%d drops=%d" ++ " from_ifindex=%d to_ifindex=%d err=%d", ++ __entry->map_id, __entry->map_index, ++ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), ++ __entry->sent, __entry->drops, ++ __entry->from_ifindex, __entry->to_ifindex, __entry->err) ++); ++ ++/* Expect users already include , but not xdp_priv.h */ ++#include ++ ++#define __MEM_TYPE_MAP(FN) \ ++ FN(PAGE_SHARED) \ ++ FN(PAGE_ORDER0) \ ++ FN(PAGE_POOL) \ ++ FN(ZERO_COPY) ++ ++#define __MEM_TYPE_TP_FN(x) \ ++ TRACE_DEFINE_ENUM(MEM_TYPE_##x); ++#define __MEM_TYPE_SYM_FN(x) \ ++ { MEM_TYPE_##x, #x }, ++#define __MEM_TYPE_SYM_TAB \ ++ __MEM_TYPE_MAP(__MEM_TYPE_SYM_FN) { -1, 0 } ++__MEM_TYPE_MAP(__MEM_TYPE_TP_FN) ++ ++TRACE_EVENT(mem_disconnect, ++ ++ TP_PROTO(const struct xdp_mem_allocator *xa), ++ ++ TP_ARGS(xa), ++ ++ TP_STRUCT__entry( ++ __field(const struct xdp_mem_allocator *, xa) ++ __field(u32, mem_id) ++ __field(u32, mem_type) ++ __field(const void *, allocator) ++ ), ++ ++ TP_fast_assign( ++ __entry->xa = xa; ++ __entry->mem_id = xa->mem.id; ++ __entry->mem_type = xa->mem.type; ++ __entry->allocator = xa->allocator; ++ ), ++ ++ TP_printk("mem_id=%d mem_type=%s allocator=%p", ++ __entry->mem_id, ++ __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), ++ __entry->allocator ++ ) ++); ++ ++TRACE_EVENT(mem_connect, ++ ++ TP_PROTO(const struct xdp_mem_allocator *xa, ++ const struct xdp_rxq_info *rxq), ++ ++ TP_ARGS(xa, rxq), ++ ++ TP_STRUCT__entry( ++ __field(const struct xdp_mem_allocator *, xa) ++ __field(u32, mem_id) ++ __field(u32, mem_type) ++ __field(const void *, allocator) ++ __field(const struct xdp_rxq_info *, rxq) ++ __field(int, ifindex) ++ ), ++ ++ TP_fast_assign( ++ __entry->xa = xa; ++ __entry->mem_id = xa->mem.id; ++ __entry->mem_type = xa->mem.type; ++ __entry->allocator = xa->allocator; ++ __entry->rxq = rxq; ++ __entry->ifindex = rxq->dev->ifindex; ++ ), ++ ++ TP_printk("mem_id=%d mem_type=%s allocator=%p" ++ " ifindex=%d", ++ __entry->mem_id, ++ __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), ++ __entry->allocator, ++ __entry->ifindex ++ ) ++); ++ ++TRACE_EVENT(mem_return_failed, ++ ++ TP_PROTO(const struct xdp_mem_info *mem, ++ const struct page *page), ++ ++ TP_ARGS(mem, page), ++ ++ TP_STRUCT__entry( ++ __field(const struct page *, page) ++ __field(u32, mem_id) ++ __field(u32, mem_type) ++ ), ++ ++ TP_fast_assign( ++ __entry->page = page; ++ __entry->mem_id = mem->id; ++ __entry->mem_type = mem->type; ++ ), ++ ++ TP_printk("mem_id=%d mem_type=%s page=%p", ++ __entry->mem_id, ++ __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), ++ __entry->page ++ ) ++); ++ ++#endif /* _TRACE_XDP_H */ ++ ++#include +--- /dev/null ++++ b/include/net/xdp_priv.h +@@ -0,0 +1,20 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __LINUX_NET_XDP_PRIV_H__ ++#define __LINUX_NET_XDP_PRIV_H__ ++ ++#include ++#include ++ ++/* Private to net/core/xdp.c, but used by trace/events/xdp.h */ ++struct xdp_mem_allocator { ++ struct xdp_mem_info mem; ++ union { ++ void *allocator; ++ struct page_pool *page_pool; ++ struct zero_copy_allocator *zc_alloc; ++ }; ++ struct rhash_head node; ++ struct rcu_head rcu; ++}; ++ ++#endif /* __LINUX_NET_XDP_PRIV_H__ */ +--- /dev/null ++++ b/include/net/xdp.h +@@ -0,0 +1,184 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* include/net/xdp.h ++ * ++ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. ++ */ ++#ifndef __LINUX_NET_XDP_H__ ++#define __LINUX_NET_XDP_H__ ++ ++/** ++ * DOC: XDP RX-queue information ++ * ++ * The XDP RX-queue info (xdp_rxq_info) is associated with the driver ++ * level RX-ring queues. It is information that is specific to how ++ * the driver have configured a given RX-ring queue. ++ * ++ * Each xdp_buff frame received in the driver carry a (pointer) ++ * reference to this xdp_rxq_info structure. This provides the XDP ++ * data-path read-access to RX-info for both kernel and bpf-side ++ * (limited subset). ++ * ++ * For now, direct access is only safe while running in NAPI/softirq ++ * context. Contents is read-mostly and must not be updated during ++ * driver NAPI/softirq poll. ++ * ++ * The driver usage API is a register and unregister API. ++ * ++ * The struct is not directly tied to the XDP prog. A new XDP prog ++ * can be attached as long as it doesn't change the underlying ++ * RX-ring. If the RX-ring does change significantly, the NIC driver ++ * naturally need to stop the RX-ring before purging and reallocating ++ * memory. In that process the driver MUST call unregistor (which ++ * also apply for driver shutdown and unload). The register API is ++ * also mandatory during RX-ring setup. ++ */ ++ ++enum xdp_mem_type { ++ MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */ ++ MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */ ++ MEM_TYPE_PAGE_POOL, ++ MEM_TYPE_ZERO_COPY, ++ MEM_TYPE_MAX, ++}; ++ ++/* XDP flags for ndo_xdp_xmit */ ++#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */ ++#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH ++ ++struct xdp_mem_info { ++ u32 type; /* enum xdp_mem_type, but known size type */ ++ u32 id; ++}; ++ ++struct page_pool; ++ ++struct zero_copy_allocator { ++ void (*free)(struct zero_copy_allocator *zca, unsigned long handle); ++}; ++ ++struct xdp_rxq_info { ++ struct net_device *dev; ++ u32 queue_index; ++ u32 reg_state; ++ struct xdp_mem_info mem; ++} ____cacheline_aligned; /* perf critical, avoid false-sharing */ ++ ++struct xdp_buff { ++ void *data; ++ void *data_end; ++ void *data_meta; ++ void *data_hard_start; ++ unsigned long handle; ++ struct xdp_rxq_info *rxq; ++}; ++ ++struct xdp_frame { ++ void *data; ++ u16 len; ++ u16 headroom; ++ u16 metasize; ++ /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time, ++ * while mem info is valid on remote CPU. ++ */ ++ struct xdp_mem_info mem; ++ struct net_device *dev_rx; /* used by cpumap */ ++}; ++ ++/* Clear kernel pointers in xdp_frame */ ++static inline void xdp_scrub_frame(struct xdp_frame *frame) ++{ ++ frame->data = NULL; ++ frame->dev_rx = NULL; ++} ++ ++struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); ++ ++/* Convert xdp_buff to xdp_frame */ ++static inline ++struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) ++{ ++ struct xdp_frame *xdp_frame; ++ int metasize; ++ int headroom; ++ ++ if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ++ return xdp_convert_zc_to_xdp_frame(xdp); ++ ++ /* Assure headroom is available for storing info */ ++ headroom = xdp->data - xdp->data_hard_start; ++ metasize = xdp->data - xdp->data_meta; ++ metasize = metasize > 0 ? metasize : 0; ++ if (unlikely((headroom - metasize) < sizeof(*xdp_frame))) ++ return NULL; ++ ++ /* Store info in top of packet */ ++ xdp_frame = xdp->data_hard_start; ++ ++ xdp_frame->data = xdp->data; ++ xdp_frame->len = xdp->data_end - xdp->data; ++ xdp_frame->headroom = headroom - sizeof(*xdp_frame); ++ xdp_frame->metasize = metasize; ++ ++ /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */ ++ xdp_frame->mem = xdp->rxq->mem; ++ ++ return xdp_frame; ++} ++ ++void xdp_return_frame(struct xdp_frame *xdpf); ++void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); ++void xdp_return_buff(struct xdp_buff *xdp); ++ ++/* When sending xdp_frame into the network stack, then there is no ++ * return point callback, which is needed to release e.g. DMA-mapping ++ * resources with page_pool. Thus, have explicit function to release ++ * frame resources. ++ */ ++void __xdp_release_frame(void *data, struct xdp_mem_info *mem); ++static inline void xdp_release_frame(struct xdp_frame *xdpf) ++{ ++ struct xdp_mem_info *mem = &xdpf->mem; ++ ++ /* Curr only page_pool needs this */ ++ if (mem->type == MEM_TYPE_PAGE_POOL) ++ __xdp_release_frame(xdpf->data, mem); ++} ++ ++int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, ++ struct net_device *dev, u32 queue_index); ++void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); ++void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); ++bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); ++int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, ++ enum xdp_mem_type type, void *allocator); ++void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); ++ ++/* Drivers not supporting XDP metadata can use this helper, which ++ * rejects any room expansion for metadata as a result. ++ */ ++static __always_inline void ++xdp_set_data_meta_invalid(struct xdp_buff *xdp) ++{ ++ xdp->data_meta = xdp->data + 1; ++} ++ ++static __always_inline bool ++xdp_data_meta_unsupported(const struct xdp_buff *xdp) ++{ ++ return unlikely(xdp->data_meta > xdp->data); ++} ++ ++struct xdp_attachment_info { ++ struct bpf_prog *prog; ++ u32 flags; ++}; ++ ++struct netdev_bpf; ++int xdp_attachment_query(struct xdp_attachment_info *info, ++ struct netdev_bpf *bpf); ++bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, ++ struct netdev_bpf *bpf); ++void xdp_attachment_setup(struct xdp_attachment_info *info, ++ struct netdev_bpf *bpf); ++ ++#endif /* __LINUX_NET_XDP_H__ */ +--- a/include/linux/atomic.h ++++ b/include/linux/atomic.h +@@ -437,6 +437,8 @@ static inline int atomic_add_unless(atom + return __atomic_add_unless(v, a, u) != u; + } + ++#define atomic_fetch_add_unless __atomic_add_unless ++ + /** + * atomic_inc_not_zero - increment unless the number is zero + * @v: pointer of type atomic_t +--- a/include/linux/kernel.h ++++ b/include/linux/kernel.h +@@ -45,6 +45,13 @@ + + #define STACK_MAGIC 0xdeadbeef + ++#define u64_to_user_ptr(x) ( \ ++ { \ ++ typecheck(u64, (x)); \ ++ (void __user *)(uintptr_t)(x); \ ++ } \ ++) ++ + #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) + + #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +--- /dev/null ++++ b/include/linux/tnum.h +@@ -0,0 +1,89 @@ ++/* tnum: tracked (or tristate) numbers ++ * ++ * A tnum tracks knowledge about the bits of a value. Each bit can be either ++ * known (0 or 1), or unknown (x). Arithmetic operations on tnums will ++ * propagate the unknown bits such that the tnum result represents all the ++ * possible results for possible values of the operands. ++ */ ++ ++#ifndef _LINUX_TNUM_H ++#define _LINUX_TNUM_H ++ ++#include ++ ++struct tnum { ++ u64 value; ++ u64 mask; ++}; ++ ++/* Constructors */ ++/* Represent a known constant as a tnum. */ ++struct tnum tnum_const(u64 value); ++/* A completely unknown value */ ++extern const struct tnum tnum_unknown; ++/* A value that's unknown except that @min <= value <= @max */ ++struct tnum tnum_range(u64 min, u64 max); ++ ++/* Arithmetic and logical ops */ ++/* Shift a tnum left (by a fixed shift) */ ++struct tnum tnum_lshift(struct tnum a, u8 shift); ++/* Shift (rsh) a tnum right (by a fixed shift) */ ++struct tnum tnum_rshift(struct tnum a, u8 shift); ++/* Shift (arsh) a tnum right (by a fixed min_shift) */ ++struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness); ++/* Add two tnums, return @a + @b */ ++struct tnum tnum_add(struct tnum a, struct tnum b); ++/* Subtract two tnums, return @a - @b */ ++struct tnum tnum_sub(struct tnum a, struct tnum b); ++/* Bitwise-AND, return @a & @b */ ++struct tnum tnum_and(struct tnum a, struct tnum b); ++/* Bitwise-OR, return @a | @b */ ++struct tnum tnum_or(struct tnum a, struct tnum b); ++/* Bitwise-XOR, return @a ^ @b */ ++struct tnum tnum_xor(struct tnum a, struct tnum b); ++/* Multiply two tnums, return @a * @b */ ++struct tnum tnum_mul(struct tnum a, struct tnum b); ++ ++/* Return a tnum representing numbers satisfying both @a and @b */ ++struct tnum tnum_intersect(struct tnum a, struct tnum b); ++ ++/* Return @a with all but the lowest @size bytes cleared */ ++struct tnum tnum_cast(struct tnum a, u8 size); ++ ++/* Returns true if @a is a known constant */ ++static inline bool tnum_is_const(struct tnum a) ++{ ++ return !a.mask; ++} ++ ++/* Returns true if @a == tnum_const(@b) */ ++static inline bool tnum_equals_const(struct tnum a, u64 b) ++{ ++ return tnum_is_const(a) && a.value == b; ++} ++ ++/* Returns true if @a is completely unknown */ ++static inline bool tnum_is_unknown(struct tnum a) ++{ ++ return !~a.mask; ++} ++ ++/* Returns true if @a is known to be a multiple of @size. ++ * @size must be a power of two. ++ */ ++bool tnum_is_aligned(struct tnum a, u64 size); ++ ++/* Returns true if @b represents a subset of @a. */ ++bool tnum_in(struct tnum a, struct tnum b); ++ ++/* Formatting functions. These have snprintf-like semantics: they will write ++ * up to @size bytes (including the terminating NUL byte), and return the number ++ * of bytes (excluding the terminating NUL) which would have been written had ++ * sufficient space been available. (Thus tnum_sbin always returns 64.) ++ */ ++/* Format a tnum as a pair of hex numbers (value; mask) */ ++int tnum_strn(char *str, size_t size, struct tnum a); ++/* Format a tnum as tristate binary expansion */ ++int tnum_sbin(char *str, size_t size, struct tnum a); ++ ++#endif /* _LINUX_TNUM_H */ +--- a/include/linux/bitmap.h ++++ b/include/linux/bitmap.h +@@ -326,6 +326,24 @@ static inline int bitmap_parse(const cha + return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); + } + ++/** ++ * bitmap_from_u64 - Check and swap words within u64. ++ * @mask: source bitmap ++ * @dst: destination bitmap ++ * ++ * In 32-bit Big Endian kernel, when using ``(u32 *)(&val)[*]`` ++ * to read u64 mask, we will get the wrong word. ++ * That is ``(u32 *)(&val)[0]`` gets the upper 32 bits, ++ * but we expect the lower 32-bits of u64. ++ */ ++static inline void bitmap_from_u64(unsigned long *dst, u64 mask) ++{ ++ dst[0] = mask & ULONG_MAX; ++ ++ if (sizeof(mask) > sizeof(unsigned long)) ++ dst[1] = mask >> 32; ++} ++ + #endif /* __ASSEMBLY__ */ + + #endif /* __LINUX_BITMAP_H */ +--- /dev/null ++++ b/include/linux/overflow.h +@@ -0,0 +1,320 @@ ++/* SPDX-License-Identifier: GPL-2.0 OR MIT */ ++#ifndef __LINUX_OVERFLOW_H ++#define __LINUX_OVERFLOW_H ++ ++#include ++#include ++ ++/* ++ * In the fallback code below, we need to compute the minimum and ++ * maximum values representable in a given type. These macros may also ++ * be useful elsewhere, so we provide them outside the ++ * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block. ++ * ++ * It would seem more obvious to do something like ++ * ++ * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) ++ * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) ++ * ++ * Unfortunately, the middle expressions, strictly speaking, have ++ * undefined behaviour, and at least some versions of gcc warn about ++ * the type_max expression (but not if -fsanitize=undefined is in ++ * effect; in that case, the warning is deferred to runtime...). ++ * ++ * The slightly excessive casting in type_min is to make sure the ++ * macros also produce sensible values for the exotic type _Bool. [The ++ * overflow checkers only almost work for _Bool, but that's ++ * a-feature-not-a-bug, since people shouldn't be doing arithmetic on ++ * _Bools. Besides, the gcc builtins don't allow _Bool* as third ++ * argument.] ++ * ++ * Idea stolen from ++ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html - ++ * credit to Christian Biere. ++ */ ++#define is_signed_type(type) (((type)(-1)) < (type)1) ++#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) ++#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) ++#define type_min(T) ((T)((T)-type_max(T)-(T)1)) ++ ++/* ++ * Avoids triggering -Wtype-limits compilation warning, ++ * while using unsigned data types to check a < 0. ++ */ ++#define is_non_negative(a) ((a) > 0 || (a) == 0) ++#define is_negative(a) (!(is_non_negative(a))) ++ ++#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW ++/* ++ * For simplicity and code hygiene, the fallback code below insists on ++ * a, b and *d having the same type (similar to the min() and max() ++ * macros), whereas gcc's type-generic overflow checkers accept ++ * different types. Hence we don't just make check_add_overflow an ++ * alias for __builtin_add_overflow, but add type checks similar to ++ * below. ++ */ ++#define check_add_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ __builtin_add_overflow(__a, __b, __d); \ ++}) ++ ++#define check_sub_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ __builtin_sub_overflow(__a, __b, __d); \ ++}) ++ ++#define check_mul_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ __builtin_mul_overflow(__a, __b, __d); \ ++}) ++ ++#else ++ ++ ++/* Checking for unsigned overflow is relatively easy without causing UB. */ ++#define __unsigned_add_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ *__d = __a + __b; \ ++ *__d < __a; \ ++}) ++#define __unsigned_sub_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ *__d = __a - __b; \ ++ __a < __b; \ ++}) ++/* ++ * If one of a or b is a compile-time constant, this avoids a division. ++ */ ++#define __unsigned_mul_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ *__d = __a * __b; \ ++ __builtin_constant_p(__b) ? \ ++ __b > 0 && __a > type_max(typeof(__a)) / __b : \ ++ __a > 0 && __b > type_max(typeof(__b)) / __a; \ ++}) ++ ++/* ++ * For signed types, detecting overflow is much harder, especially if ++ * we want to avoid UB. But the interface of these macros is such that ++ * we must provide a result in *d, and in fact we must produce the ++ * result promised by gcc's builtins, which is simply the possibly ++ * wrapped-around value. Fortunately, we can just formally do the ++ * operations in the widest relevant unsigned type (u64) and then ++ * truncate the result - gcc is smart enough to generate the same code ++ * with and without the (u64) casts. ++ */ ++ ++/* ++ * Adding two signed integers can overflow only if they have the same ++ * sign, and overflow has happened iff the result has the opposite ++ * sign. ++ */ ++#define __signed_add_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ *__d = (u64)__a + (u64)__b; \ ++ (((~(__a ^ __b)) & (*__d ^ __a)) \ ++ & type_min(typeof(__a))) != 0; \ ++}) ++ ++/* ++ * Subtraction is similar, except that overflow can now happen only ++ * when the signs are opposite. In this case, overflow has happened if ++ * the result has the opposite sign of a. ++ */ ++#define __signed_sub_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ *__d = (u64)__a - (u64)__b; \ ++ ((((__a ^ __b)) & (*__d ^ __a)) \ ++ & type_min(typeof(__a))) != 0; \ ++}) ++ ++/* ++ * Signed multiplication is rather hard. gcc always follows C99, so ++ * division is truncated towards 0. This means that we can write the ++ * overflow check like this: ++ * ++ * (a > 0 && (b > MAX/a || b < MIN/a)) || ++ * (a < -1 && (b > MIN/a || b < MAX/a) || ++ * (a == -1 && b == MIN) ++ * ++ * The redundant casts of -1 are to silence an annoying -Wtype-limits ++ * (included in -Wextra) warning: When the type is u8 or u16, the ++ * __b_c_e in check_mul_overflow obviously selects ++ * __unsigned_mul_overflow, but unfortunately gcc still parses this ++ * code and warns about the limited range of __b. ++ */ ++ ++#define __signed_mul_overflow(a, b, d) ({ \ ++ typeof(a) __a = (a); \ ++ typeof(b) __b = (b); \ ++ typeof(d) __d = (d); \ ++ typeof(a) __tmax = type_max(typeof(a)); \ ++ typeof(a) __tmin = type_min(typeof(a)); \ ++ (void) (&__a == &__b); \ ++ (void) (&__a == __d); \ ++ *__d = (u64)__a * (u64)__b; \ ++ (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ ++ (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ ++ (__b == (typeof(__b))-1 && __a == __tmin); \ ++}) ++ ++ ++#define check_add_overflow(a, b, d) \ ++ __builtin_choose_expr(is_signed_type(typeof(a)), \ ++ __signed_add_overflow(a, b, d), \ ++ __unsigned_add_overflow(a, b, d)) ++ ++#define check_sub_overflow(a, b, d) \ ++ __builtin_choose_expr(is_signed_type(typeof(a)), \ ++ __signed_sub_overflow(a, b, d), \ ++ __unsigned_sub_overflow(a, b, d)) ++ ++#define check_mul_overflow(a, b, d) \ ++ __builtin_choose_expr(is_signed_type(typeof(a)), \ ++ __signed_mul_overflow(a, b, d), \ ++ __unsigned_mul_overflow(a, b, d)) ++ ++ ++#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ ++ ++/** check_shl_overflow() - Calculate a left-shifted value and check overflow ++ * ++ * @a: Value to be shifted ++ * @s: How many bits left to shift ++ * @d: Pointer to where to store the result ++ * ++ * Computes *@d = (@a << @s) ++ * ++ * Returns true if '*d' cannot hold the result or when 'a << s' doesn't ++ * make sense. Example conditions: ++ * - 'a << s' causes bits to be lost when stored in *d. ++ * - 's' is garbage (e.g. negative) or so large that the result of ++ * 'a << s' is guaranteed to be 0. ++ * - 'a' is negative. ++ * - 'a << s' sets the sign bit, if any, in '*d'. ++ * ++ * '*d' will hold the results of the attempted shift, but is not ++ * considered "safe for use" if false is returned. ++ */ ++#define check_shl_overflow(a, s, d) ({ \ ++ typeof(a) _a = a; \ ++ typeof(s) _s = s; \ ++ typeof(d) _d = d; \ ++ u64 _a_full = _a; \ ++ unsigned int _to_shift = \ ++ is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \ ++ *_d = (_a_full << _to_shift); \ ++ (_to_shift != _s || is_negative(*_d) || is_negative(_a) || \ ++ (*_d >> _to_shift) != _a); \ ++}) ++ ++/** ++ * array_size() - Calculate size of 2-dimensional array. ++ * ++ * @a: dimension one ++ * @b: dimension two ++ * ++ * Calculates size of 2-dimensional array: @a * @b. ++ * ++ * Returns: number of bytes needed to represent the array or SIZE_MAX on ++ * overflow. ++ */ ++static inline __must_check size_t array_size(size_t a, size_t b) ++{ ++ size_t bytes; ++ ++ if (check_mul_overflow(a, b, &bytes)) ++ return SIZE_MAX; ++ ++ return bytes; ++} ++ ++/** ++ * array3_size() - Calculate size of 3-dimensional array. ++ * ++ * @a: dimension one ++ * @b: dimension two ++ * @c: dimension three ++ * ++ * Calculates size of 3-dimensional array: @a * @b * @c. ++ * ++ * Returns: number of bytes needed to represent the array or SIZE_MAX on ++ * overflow. ++ */ ++static inline __must_check size_t array3_size(size_t a, size_t b, size_t c) ++{ ++ size_t bytes; ++ ++ if (check_mul_overflow(a, b, &bytes)) ++ return SIZE_MAX; ++ if (check_mul_overflow(bytes, c, &bytes)) ++ return SIZE_MAX; ++ ++ return bytes; ++} ++ ++/* ++ * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for ++ * struct_size() below. ++ */ ++static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) ++{ ++ size_t bytes; ++ ++ if (check_mul_overflow(a, b, &bytes)) ++ return SIZE_MAX; ++ if (check_add_overflow(bytes, c, &bytes)) ++ return SIZE_MAX; ++ ++ return bytes; ++} ++ ++/** ++ * struct_size() - Calculate size of structure with trailing array. ++ * @p: Pointer to the structure. ++ * @member: Name of the array member. ++ * @n: Number of elements in the array. ++ * ++ * Calculates size of memory needed for structure @p followed by an ++ * array of @n @member elements. ++ * ++ * Return: number of bytes needed or SIZE_MAX on overflow. ++ */ ++#define struct_size(p, member, n) \ ++ __ab_c_size(n, \ ++ sizeof(*(p)->member) + __must_be_array((p)->member),\ ++ sizeof(*(p))) ++ ++#endif /* __LINUX_OVERFLOW_H */ +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1,3 +1,4 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later + /* + * Linux Socket Filter - Kernel level socket filtering + * +@@ -12,11 +13,6 @@ + * Alexei Starovoitov + * Daniel Borkmann + * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version +- * 2 of the License, or (at your option) any later version. +- * + * Andi Kleen - Fix a few bad bugs and races. + * Kris Katterjohn - Added many additional checks in bpf_check_classic() + */ +@@ -26,11 +22,14 @@ + #include + #include + #include ++#include + #include + #include + #include + #include ++#include + #include ++#include + #include + #include + #include +@@ -39,17 +38,32 @@ + #include + #include + #include +-#include ++#include + #include ++#include + #include + #include + #include + #include + #include + #include +-#include + #include + #include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + + /** + * sk_filter_trim_cap - run a packet through a socket filter +@@ -84,7 +98,12 @@ int sk_filter_trim_cap(struct sock *sk, + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); + if (filter) { +- unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb); ++ struct sock *save_sk = skb->sk; ++ unsigned int pkt_len; ++ ++ skb->sk = sk; ++ pkt_len = bpf_prog_run_save_cb(filter->prog, skb); ++ skb->sk = save_sk; + err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; + } + rcu_read_unlock(); +@@ -93,14 +112,13 @@ int sk_filter_trim_cap(struct sock *sk, + } + EXPORT_SYMBOL(sk_filter_trim_cap); + +-static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) ++BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb) + { +- return skb_get_poff((struct sk_buff *)(unsigned long) ctx); ++ return skb_get_poff(skb); + } + +-static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) ++BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) + { +- struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) +@@ -119,9 +137,8 @@ static u64 __skb_get_nlattr(u64 ctx, u64 + return 0; + } + +-static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) ++BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) + { +- struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) +@@ -144,11 +161,98 @@ static u64 __skb_get_nlattr_nest(u64 ctx + return 0; + } + +-static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) ++BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *, ++ data, int, headlen, int, offset) ++{ ++ u8 tmp, *ptr; ++ const int len = sizeof(tmp); ++ ++ if (offset >= 0) { ++ if (headlen - offset >= len) ++ return *(u8 *)(data + offset); ++ if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) ++ return tmp; ++ } else { ++ ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); ++ if (likely(ptr)) ++ return *(u8 *)ptr; ++ } ++ ++ return -EFAULT; ++} ++ ++BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, ++ int, offset) ++{ ++ return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len, ++ offset); ++} ++ ++BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *, ++ data, int, headlen, int, offset) ++{ ++ u16 tmp, *ptr; ++ const int len = sizeof(tmp); ++ ++ if (offset >= 0) { ++ if (headlen - offset >= len) ++ return get_unaligned_be16(data + offset); ++ if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) ++ return be16_to_cpu(tmp); ++ } else { ++ ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); ++ if (likely(ptr)) ++ return get_unaligned_be16(ptr); ++ } ++ ++ return -EFAULT; ++} ++ ++BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, ++ int, offset) ++{ ++ return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len, ++ offset); ++} ++ ++BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *, ++ data, int, headlen, int, offset) ++{ ++ u32 tmp, *ptr; ++ const int len = sizeof(tmp); ++ ++ if (likely(offset >= 0)) { ++ if (headlen - offset >= len) ++ return get_unaligned_be32(data + offset); ++ if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) ++ return be32_to_cpu(tmp); ++ } else { ++ ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); ++ if (likely(ptr)) ++ return get_unaligned_be32(ptr); ++ } ++ ++ return -EFAULT; ++} ++ ++BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb, ++ int, offset) ++{ ++ return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len, ++ offset); ++} ++ ++BPF_CALL_0(bpf_get_raw_cpu_id) + { + return raw_smp_processor_id(); + } + ++static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { ++ .func = bpf_get_raw_cpu_id, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++}; ++ + static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, + struct bpf_insn *insn_buf) + { +@@ -178,22 +282,18 @@ static u32 convert_skb_access(int skb_fi + break; + + case SKF_AD_VLAN_TAG: +- case SKF_AD_VLAN_TAG_PRESENT: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); +- BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + + /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */ + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, vlan_tci)); +- if (skb_field == SKF_AD_VLAN_TAG) { +- *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, +- ~VLAN_TAG_PRESENT); +- } else { +- /* dst_reg >>= 12 */ +- *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12); +- /* dst_reg &= 1 */ ++ break; ++ case SKF_AD_VLAN_TAG_PRESENT: ++ *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET()); ++ if (PKT_VLAN_PRESENT_BIT) ++ *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT); ++ if (PKT_VLAN_PRESENT_BIT < 7) + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1); +- } + break; + } + +@@ -226,9 +326,8 @@ static bool convert_bpf_extensions(struc + case SKF_AD_OFF + SKF_AD_HATYPE: + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); +- BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0); + +- *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), + BPF_REG_TMP, BPF_REG_CTX, + offsetof(struct sk_buff, dev)); + /* if (tmp != 0) goto pc + 1 */ +@@ -295,16 +394,16 @@ static bool convert_bpf_extensions(struc + /* Emit call(arg1=CTX, arg2=A, arg3=X) */ + switch (fp->k) { + case SKF_AD_OFF + SKF_AD_PAY_OFFSET: +- *insn = BPF_EMIT_CALL(__skb_get_pay_offset); ++ *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset); + break; + case SKF_AD_OFF + SKF_AD_NLATTR: +- *insn = BPF_EMIT_CALL(__skb_get_nlattr); ++ *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr); + break; + case SKF_AD_OFF + SKF_AD_NLATTR_NEST: +- *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest); ++ *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest); + break; + case SKF_AD_OFF + SKF_AD_CPU: +- *insn = BPF_EMIT_CALL(__get_raw_cpu_id); ++ *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id); + break; + case SKF_AD_OFF + SKF_AD_RANDOM: + *insn = BPF_EMIT_CALL(bpf_user_rnd_u32); +@@ -331,35 +430,101 @@ static bool convert_bpf_extensions(struc + return true; + } + ++static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp) ++{ ++ const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS); ++ int size = bpf_size_to_bytes(BPF_SIZE(fp->code)); ++ bool endian = BPF_SIZE(fp->code) == BPF_H || ++ BPF_SIZE(fp->code) == BPF_W; ++ bool indirect = BPF_MODE(fp->code) == BPF_IND; ++ const int ip_align = NET_IP_ALIGN; ++ struct bpf_insn *insn = *insnp; ++ int offset = fp->k; ++ ++ if (!indirect && ++ ((unaligned_ok && offset >= 0) || ++ (!unaligned_ok && offset >= 0 && ++ offset + ip_align >= 0 && ++ offset + ip_align % size == 0))) { ++ bool ldx_off_ok = offset <= S16_MAX; ++ ++ *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H); ++ if (offset) ++ *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset); ++ *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, ++ size, 2 + endian + (!ldx_off_ok * 2)); ++ if (ldx_off_ok) { ++ *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, ++ BPF_REG_D, offset); ++ } else { ++ *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D); ++ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset); ++ *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, ++ BPF_REG_TMP, 0); ++ } ++ if (endian) ++ *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8); ++ *insn++ = BPF_JMP_A(8); ++ } ++ ++ *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); ++ *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D); ++ *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H); ++ if (!indirect) { ++ *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset); ++ } else { ++ *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X); ++ if (fp->k) ++ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset); ++ } ++ ++ switch (BPF_SIZE(fp->code)) { ++ case BPF_B: ++ *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8); ++ break; ++ case BPF_H: ++ *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16); ++ break; ++ case BPF_W: ++ *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32); ++ break; ++ default: ++ return false; ++ } ++ ++ *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2); ++ *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); ++ *insn = BPF_EXIT_INSN(); ++ ++ *insnp = insn; ++ return true; ++} ++ + /** + * bpf_convert_filter - convert filter program + * @prog: the user passed filter program + * @len: the length of the user passed filter program +- * @new_prog: buffer where converted program will be stored ++ * @new_prog: allocated 'struct bpf_prog' or NULL + * @new_len: pointer to store length of converted program ++ * @seen_ld_abs: bool whether we've seen ld_abs/ind + * +- * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style. ++ * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn' ++ * style extended BPF (eBPF). + * Conversion workflow: + * + * 1) First pass for calculating the new program length: +- * bpf_convert_filter(old_prog, old_len, NULL, &new_len) ++ * bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs) + * + * 2) 2nd pass to remap in two passes: 1st pass finds new + * jump offsets, 2nd pass remapping: +- * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len); +- * bpf_convert_filter(old_prog, old_len, new_prog, &new_len); +- * +- * User BPF's register A is mapped to our BPF register 6, user BPF +- * register X is mapped to BPF register 7; frame pointer is always +- * register 10; Context 'void *ctx' is stored in register 1, that is, +- * for socket filters: ctx == 'struct sk_buff *', for seccomp: +- * ctx == 'struct seccomp_data *'. ++ * bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs) + */ + static int bpf_convert_filter(struct sock_filter *prog, int len, +- struct bpf_insn *new_prog, int *new_len) ++ struct bpf_prog *new_prog, int *new_len, ++ bool *seen_ld_abs) + { +- int new_flen = 0, pass = 0, target, i; +- struct bpf_insn *new_insn; ++ int new_flen = 0, pass = 0, target, i, stack_off; ++ struct bpf_insn *new_insn, *first_insn = NULL; + struct sock_filter *fp; + int *addrs = NULL; + u8 bpf_src; +@@ -371,6 +536,7 @@ static int bpf_convert_filter(struct soc + return -EINVAL; + + if (new_prog) { ++ first_insn = new_prog->insnsi; + addrs = kcalloc(len, sizeof(*addrs), + GFP_KERNEL | __GFP_NOWARN); + if (!addrs) +@@ -378,19 +544,47 @@ static int bpf_convert_filter(struct soc + } + + do_pass: +- new_insn = new_prog; ++ new_insn = first_insn; + fp = prog; + +- if (new_insn) +- *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); +- new_insn++; ++ /* Classic BPF related prologue emission. */ ++ if (new_prog) { ++ /* Classic BPF expects A and X to be reset first. These need ++ * to be guaranteed to be the first two instructions. ++ */ ++ *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); ++ *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); ++ ++ /* All programs must keep CTX in callee saved BPF_REG_CTX. ++ * In eBPF case it's done by the compiler, here we need to ++ * do this ourself. Initial CTX is present in BPF_REG_ARG1. ++ */ ++ *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); ++ if (*seen_ld_abs) { ++ /* For packet access in classic BPF, cache skb->data ++ * in callee-saved BPF R8 and skb->len - skb->data_len ++ * (headlen) in BPF R9. Since classic BPF is read-only ++ * on CTX, we only need to cache it once. ++ */ ++ *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), ++ BPF_REG_D, BPF_REG_CTX, ++ offsetof(struct sk_buff, data)); ++ *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX, ++ offsetof(struct sk_buff, len)); ++ *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX, ++ offsetof(struct sk_buff, data_len)); ++ *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP); ++ } ++ } else { ++ new_insn += 3; ++ } + + for (i = 0; i < len; fp++, i++) { +- struct bpf_insn tmp_insns[6] = { }; ++ struct bpf_insn tmp_insns[32] = { }; + struct bpf_insn *insn = tmp_insns; + + if (addrs) +- addrs[i] = new_insn - new_prog; ++ addrs[i] = new_insn - first_insn; + + switch (fp->code) { + /* All arithmetic insns and skb loads map as-is. */ +@@ -429,6 +623,22 @@ do_pass: + BPF_MODE(fp->code) == BPF_ABS && + convert_bpf_extensions(fp, &insn)) + break; ++ if (BPF_CLASS(fp->code) == BPF_LD && ++ convert_bpf_ld_abs(fp, &insn)) { ++ *seen_ld_abs = true; ++ break; ++ } ++ ++ if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || ++ fp->code == (BPF_ALU | BPF_MOD | BPF_X)) { ++ *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); ++ /* Error with exception code on div/mod by 0. ++ * For cBPF programs, this was always return 0. ++ */ ++ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2); ++ *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); ++ *insn++ = BPF_EXIT_INSN(); ++ } + + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); + break; +@@ -441,11 +651,18 @@ do_pass: + + #define BPF_EMIT_JMP \ + do { \ ++ const s32 off_min = S16_MIN, off_max = S16_MAX; \ ++ s32 off; \ ++ \ + if (target >= len || target < 0) \ + goto err; \ +- insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ ++ off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ + /* Adjust pc relative offset for 2nd or 3rd insn. */ \ +- insn->off -= insn - tmp_insns; \ ++ off -= insn - tmp_insns; \ ++ /* Reject anything not fitting into insn->off. */ \ ++ if (off < off_min || off > off_max) \ ++ goto err; \ ++ insn->off = off; \ + } while (0) + + case BPF_JMP | BPF_JA: +@@ -487,14 +704,27 @@ do_pass: + break; + } + +- /* Convert JEQ into JNE when 'jump_true' is next insn. */ +- if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { +- insn->code = BPF_JMP | BPF_JNE | bpf_src; ++ /* Convert some jumps when 'jump_true' is next insn. */ ++ if (fp->jt == 0) { ++ switch (BPF_OP(fp->code)) { ++ case BPF_JEQ: ++ insn->code = BPF_JMP | BPF_JNE | bpf_src; ++ break; ++ case BPF_JGT: ++ insn->code = BPF_JMP | BPF_JLE | bpf_src; ++ break; ++ case BPF_JGE: ++ insn->code = BPF_JMP | BPF_JLT | bpf_src; ++ break; ++ default: ++ goto jmp_rest; ++ } ++ + target = i + fp->jf + 1; + BPF_EMIT_JMP; + break; + } +- ++jmp_rest: + /* Other jumps are mapped into two insns: Jxx and JA. */ + target = i + fp->jt + 1; + insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; +@@ -507,44 +737,64 @@ do_pass: + break; + + /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ +- case BPF_LDX | BPF_MSH | BPF_B: +- /* tmp = A */ +- *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); ++ case BPF_LDX | BPF_MSH | BPF_B: { ++ struct sock_filter tmp = { ++ .code = BPF_LD | BPF_ABS | BPF_B, ++ .k = fp->k, ++ }; ++ ++ *seen_ld_abs = true; ++ ++ /* X = A */ ++ *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); + /* A = BPF_R0 = *(u8 *) (skb->data + K) */ +- *insn++ = BPF_LD_ABS(BPF_B, fp->k); ++ convert_bpf_ld_abs(&tmp, &insn); ++ insn++; + /* A &= 0xf */ + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); + /* A <<= 2 */ + *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); ++ /* tmp = X */ ++ *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X); + /* X = A */ + *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); + /* A = tmp */ + *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); + break; +- +- /* RET_K, RET_A are remaped into 2 insns. */ ++ } ++ /* RET_K is remaped into 2 insns. RET_A case doesn't need an ++ * extra mov as BPF_REG_0 is already mapped into BPF_REG_A. ++ */ + case BPF_RET | BPF_A: + case BPF_RET | BPF_K: +- *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? +- BPF_K : BPF_X, BPF_REG_0, +- BPF_REG_A, fp->k); ++ if (BPF_RVAL(fp->code) == BPF_K) ++ *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0, ++ 0, fp->k); + *insn = BPF_EXIT_INSN(); + break; + + /* Store to stack. */ + case BPF_ST: + case BPF_STX: ++ stack_off = fp->k * 4 + 4; + *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == + BPF_ST ? BPF_REG_A : BPF_REG_X, +- -(BPF_MEMWORDS - fp->k) * 4); ++ -stack_off); ++ /* check_load_and_stores() verifies that classic BPF can ++ * load from stack only after write, so tracking ++ * stack_depth for ST|STX insns is enough ++ */ ++ if (new_prog && new_prog->aux->stack_depth < stack_off) ++ new_prog->aux->stack_depth = stack_off; + break; + + /* Load from stack. */ + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: ++ stack_off = fp->k * 4 + 4; + *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? + BPF_REG_A : BPF_REG_X, BPF_REG_FP, +- -(BPF_MEMWORDS - fp->k) * 4); ++ -stack_off); + break; + + /* A = K or X = K */ +@@ -592,13 +842,15 @@ do_pass: + + if (!new_prog) { + /* Only calculating new length. */ +- *new_len = new_insn - new_prog; ++ *new_len = new_insn - first_insn; ++ if (*seen_ld_abs) ++ *new_len += 4; /* Prologue bits. */ + return 0; + } + + pass++; +- if (new_flen != new_insn - new_prog) { +- new_flen = new_insn - new_prog; ++ if (new_flen != new_insn - first_insn) { ++ new_flen = new_insn - first_insn; + if (pass > 2) + goto err; + goto do_pass; +@@ -738,6 +990,17 @@ static bool chk_code_allowed(u16 code_to + return codes[code_to_probe]; + } + ++static bool bpf_check_basics_ok(const struct sock_filter *filter, ++ unsigned int flen) ++{ ++ if (filter == NULL) ++ return false; ++ if (flen == 0 || flen > BPF_MAXINSNS) ++ return false; ++ ++ return true; ++} ++ + /** + * bpf_check_classic - verify socket filter code + * @filter: filter to verify +@@ -758,9 +1021,6 @@ static int bpf_check_classic(const struc + bool anc_found; + int pc; + +- if (flen == 0 || flen > BPF_MAXINSNS) +- return -EINVAL; +- + /* Check the filter code now */ + for (pc = 0; pc < flen; pc++) { + const struct sock_filter *ftest = &filter[pc]; +@@ -901,7 +1161,7 @@ static void sk_filter_release_rcu(struct + */ + static void sk_filter_release(struct sk_filter *fp) + { +- if (atomic_dec_and_test(&fp->refcnt)) ++ if (refcount_dec_and_test(&fp->refcnt)) + call_rcu(&fp->rcu, sk_filter_release_rcu); + } + +@@ -916,25 +1176,37 @@ void sk_filter_uncharge(struct sock *sk, + /* try to charge the socket memory if there is space available + * return true on success + */ +-bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) ++static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) + { + u32 filter_size = bpf_prog_size(fp->prog->len); + + /* same check as in sock_kmalloc() */ + if (filter_size <= sysctl_optmem_max && + atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) { +- atomic_inc(&fp->refcnt); + atomic_add(filter_size, &sk->sk_omem_alloc); + return true; + } + return false; + } + ++bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) ++{ ++ if (!refcount_inc_not_zero(&fp->refcnt)) ++ return false; ++ ++ if (!__sk_filter_charge(sk, fp)) { ++ sk_filter_release(fp); ++ return false; ++ } ++ return true; ++} ++ + static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) + { + struct sock_filter *old_prog; + struct bpf_prog *old_fp; + int err, new_len, old_len = fp->len; ++ bool seen_ld_abs = false; + + /* We are free to overwrite insns et al right here as it + * won't be used at this point in time anymore internally +@@ -956,7 +1228,8 @@ static struct bpf_prog *bpf_migrate_filt + } + + /* 1st pass: calculate the new program length. */ +- err = bpf_convert_filter(old_prog, old_len, NULL, &new_len); ++ err = bpf_convert_filter(old_prog, old_len, NULL, &new_len, ++ &seen_ld_abs); + if (err) + goto out_err_free; + +@@ -975,7 +1248,8 @@ static struct bpf_prog *bpf_migrate_filt + fp->len = new_len; + + /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ +- err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len); ++ err = bpf_convert_filter(old_prog, old_len, fp, &new_len, ++ &seen_ld_abs); + if (err) + /* 2nd bpf_convert_filter() can fail only if it fails + * to allocate memory, remapping must succeed. Note, +@@ -984,7 +1258,9 @@ static struct bpf_prog *bpf_migrate_filt + */ + goto out_err_free; + +- bpf_prog_select_runtime(fp); ++ fp = bpf_prog_select_runtime(fp, &err); ++ if (err) ++ goto out_err_free; + + kfree(old_prog); + return fp; +@@ -1051,7 +1327,7 @@ int bpf_prog_create(struct bpf_prog **pf + struct bpf_prog *fp; + + /* Make sure new filter is there and in the right amounts. */ +- if (fprog->filter == NULL) ++ if (!bpf_check_basics_ok(fprog->filter, fprog->len)) + return -EINVAL; + + fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); +@@ -1098,7 +1374,7 @@ int bpf_prog_create_from_user(struct bpf + int err; + + /* Make sure new filter is there and in the right amounts. */ +- if (fprog->filter == NULL) ++ if (!bpf_check_basics_ok(fprog->filter, fprog->len)) + return -EINVAL; + + fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); +@@ -1139,8 +1415,7 @@ void bpf_prog_destroy(struct bpf_prog *f + } + EXPORT_SYMBOL_GPL(bpf_prog_destroy); + +-static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, +- bool locked) ++static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) + { + struct sk_filter *fp, *old_fp; + +@@ -1149,53 +1424,44 @@ static int __sk_attach_prog(struct bpf_p + return -ENOMEM; + + fp->prog = prog; +- atomic_set(&fp->refcnt, 0); + +- if (!sk_filter_charge(sk, fp)) { ++ if (!__sk_filter_charge(sk, fp)) { + kfree(fp); + return -ENOMEM; + } ++ refcount_set(&fp->refcnt, 1); + +- old_fp = rcu_dereference_protected(sk->sk_filter, locked); ++ old_fp = rcu_dereference_protected(sk->sk_filter, ++ lockdep_sock_is_held(sk)); + rcu_assign_pointer(sk->sk_filter, fp); ++ + if (old_fp) + sk_filter_uncharge(sk, old_fp); + + return 0; + } + +-/** +- * sk_attach_filter - attach a socket filter +- * @fprog: the filter program +- * @sk: the socket to use +- * +- * Attach the user's filter code. We first run some sanity checks on +- * it to make sure it does not explode on us later. If an error +- * occurs or there is insufficient memory for the filter a negative +- * errno code is returned. On success the return is zero. +- */ +-int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, +- bool locked) ++static ++struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) + { + unsigned int fsize = bpf_classic_proglen(fprog); +- unsigned int bpf_fsize = bpf_prog_size(fprog->len); + struct bpf_prog *prog; + int err; + + if (sock_flag(sk, SOCK_FILTER_LOCKED)) +- return -EPERM; ++ return ERR_PTR(-EPERM); + + /* Make sure new filter is there and in the right amounts. */ +- if (fprog->filter == NULL) +- return -EINVAL; ++ if (!bpf_check_basics_ok(fprog->filter, fprog->len)) ++ return ERR_PTR(-EINVAL); + +- prog = bpf_prog_alloc(bpf_fsize, 0); ++ prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); + if (!prog) +- return -ENOMEM; ++ return ERR_PTR(-ENOMEM); + + if (copy_from_user(prog->insns, fprog->filter, fsize)) { + __bpf_prog_free(prog); +- return -EFAULT; ++ return ERR_PTR(-EINVAL); + } + + prog->len = fprog->len; +@@ -1203,17 +1469,34 @@ int __sk_attach_filter(struct sock_fprog + err = bpf_prog_store_orig_filter(prog, fprog); + if (err) { + __bpf_prog_free(prog); +- return -ENOMEM; ++ return ERR_PTR(-ENOMEM); + } + + /* bpf_prepare_filter() already takes care of freeing + * memory in case something goes wrong. + */ +- prog = bpf_prepare_filter(prog, NULL); ++ return bpf_prepare_filter(prog, NULL); ++} ++ ++/** ++ * sk_attach_filter - attach a socket filter ++ * @fprog: the filter program ++ * @sk: the socket to use ++ * ++ * Attach the user's filter code. We first run some sanity checks on ++ * it to make sure it does not explode on us later. If an error ++ * occurs or there is insufficient memory for the filter a negative ++ * errno code is returned. On success the return is zero. ++ */ ++int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) ++{ ++ struct bpf_prog *prog = __get_filter(fprog, sk); ++ int err; ++ + if (IS_ERR(prog)) + return PTR_ERR(prog); + +- err = __sk_attach_prog(prog, sk, locked); ++ err = __sk_attach_prog(prog, sk); + if (err < 0) { + __bpf_prog_release(prog); + return err; +@@ -1221,31 +1504,25 @@ int __sk_attach_filter(struct sock_fprog + + return 0; + } +-EXPORT_SYMBOL_GPL(__sk_attach_filter); ++EXPORT_SYMBOL_GPL(sk_attach_filter); + +-int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) ++static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) + { +- return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk)); ++ if (sock_flag(sk, SOCK_FILTER_LOCKED)) ++ return ERR_PTR(-EPERM); ++ ++ return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); + } + + int sk_attach_bpf(u32 ufd, struct sock *sk) + { +- struct bpf_prog *prog; ++ struct bpf_prog *prog = __get_bpf(ufd, sk); + int err; + +- if (sock_flag(sk, SOCK_FILTER_LOCKED)) +- return -EPERM; +- +- prog = bpf_prog_get(ufd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + +- if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { +- bpf_prog_put(prog); +- return -EINVAL; +- } +- +- err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk)); ++ err = __sk_attach_prog(prog, sk); + if (err < 0) { + bpf_prog_put(prog); + return err; +@@ -1254,79 +1531,201 @@ int sk_attach_bpf(u32 ufd, struct sock * + return 0; + } + +-#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) ++struct bpf_scratchpad { ++ union { ++ __be32 diff[MAX_BPF_STACK / sizeof(__be32)]; ++ u8 buff[MAX_BPF_STACK]; ++ }; ++}; ++ ++static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); ++ ++static inline int __bpf_try_make_writable(struct sk_buff *skb, ++ unsigned int write_len) ++{ ++ return skb_ensure_writable(skb, write_len); ++} ++ ++static inline int bpf_try_make_writable(struct sk_buff *skb, ++ unsigned int write_len) ++{ ++ int err = __bpf_try_make_writable(skb, write_len); ++ ++ bpf_compute_data_pointers(skb); ++ return err; ++} ++ ++static int bpf_try_make_head_writable(struct sk_buff *skb) ++{ ++ return bpf_try_make_writable(skb, skb_headlen(skb)); ++} + +-static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) ++static inline void bpf_push_mac_rcsum(struct sk_buff *skb) ++{ ++ if (skb_at_tc_ingress(skb)) ++ skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); ++} ++ ++static inline void bpf_pull_mac_rcsum(struct sk_buff *skb) ++{ ++ if (skb_at_tc_ingress(skb)) ++ skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len); ++} ++ ++BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, ++ const void *, from, u32, len, u64, flags) + { +- struct sk_buff *skb = (struct sk_buff *) (long) r1; +- int offset = (int) r2; +- void *from = (void *) (long) r3; +- unsigned int len = (unsigned int) r4; +- char buf[16]; + void *ptr; + +- /* bpf verifier guarantees that: +- * 'from' pointer points to bpf program stack +- * 'len' bytes of it were initialized +- * 'len' > 0 +- * 'skb' is a valid pointer to 'struct sk_buff' +- * +- * so check for invalid 'offset' and too large 'len' +- */ +- if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) ++ if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) ++ return -EINVAL; ++ if (unlikely(offset > 0xffff)) + return -EFAULT; +- if (unlikely(skb_try_make_writable(skb, offset + len))) ++ if (unlikely(bpf_try_make_writable(skb, offset + len))) + return -EFAULT; + +- ptr = skb_header_pointer(skb, offset, len, buf); +- if (unlikely(!ptr)) +- return -EFAULT; +- +- if (BPF_RECOMPUTE_CSUM(flags)) +- skb_postpull_rcsum(skb, ptr, len); ++ ptr = skb->data + offset; ++ if (flags & BPF_F_RECOMPUTE_CSUM) ++ __skb_postpull_rcsum(skb, ptr, len, offset); + + memcpy(ptr, from, len); + +- if (ptr == buf) +- /* skb_store_bits cannot return -EFAULT here */ +- skb_store_bits(skb, offset, ptr, len); ++ if (flags & BPF_F_RECOMPUTE_CSUM) ++ __skb_postpush_rcsum(skb, ptr, len, offset); ++ if (flags & BPF_F_INVALIDATE_HASH) ++ skb_clear_hash(skb); + +- if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE) +- skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); + return 0; + } + +-const struct bpf_func_proto bpf_skb_store_bytes_proto = { ++static const struct bpf_func_proto bpf_skb_store_bytes_proto = { + .func = bpf_skb_store_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +- .arg3_type = ARG_PTR_TO_STACK, +- .arg4_type = ARG_CONST_STACK_SIZE, ++ .arg3_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_CONST_SIZE, + .arg5_type = ARG_ANYTHING, + }; + +-#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) +-#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) ++BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, ++ void *, to, u32, len) ++{ ++ void *ptr; ++ ++ if (unlikely(offset > 0xffff)) ++ goto err_clear; ++ ++ ptr = skb_header_pointer(skb, offset, len, to); ++ if (unlikely(!ptr)) ++ goto err_clear; ++ if (ptr != to) ++ memcpy(to, ptr, len); + +-static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) ++ return 0; ++err_clear: ++ memset(to, 0, len); ++ return -EFAULT; ++} ++ ++static const struct bpf_func_proto bpf_skb_load_bytes_proto = { ++ .func = bpf_skb_load_bytes, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_ANYTHING, ++ .arg3_type = ARG_PTR_TO_UNINIT_MEM, ++ .arg4_type = ARG_CONST_SIZE, ++}; ++ ++BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, ++ u32, offset, void *, to, u32, len, u32, start_header) + { +- struct sk_buff *skb = (struct sk_buff *) (long) r1; +- int offset = (int) r2; +- __sum16 sum, *ptr; ++ u8 *end = skb_tail_pointer(skb); ++ u8 *start, *ptr; + +- if (unlikely((u32) offset > 0xffff)) +- return -EFAULT; ++ if (unlikely(offset > 0xffff)) ++ goto err_clear; + +- if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) +- return -EFAULT; ++ switch (start_header) { ++ case BPF_HDR_START_MAC: ++ if (unlikely(!skb_mac_header_was_set(skb))) ++ goto err_clear; ++ start = skb_mac_header(skb); ++ break; ++ case BPF_HDR_START_NET: ++ start = skb_network_header(skb); ++ break; ++ default: ++ goto err_clear; ++ } + +- ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); +- if (unlikely(!ptr)) ++ ptr = start + offset; ++ ++ if (likely(ptr + len <= end)) { ++ memcpy(to, ptr, len); ++ return 0; ++ } ++ ++err_clear: ++ memset(to, 0, len); ++ return -EFAULT; ++} ++ ++static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = { ++ .func = bpf_skb_load_bytes_relative, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_ANYTHING, ++ .arg3_type = ARG_PTR_TO_UNINIT_MEM, ++ .arg4_type = ARG_CONST_SIZE, ++ .arg5_type = ARG_ANYTHING, ++}; ++ ++BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) ++{ ++ /* Idea is the following: should the needed direct read/write ++ * test fail during runtime, we can pull in more data and redo ++ * again, since implicitly, we invalidate previous checks here. ++ * ++ * Or, since we know how much we need to make read/writeable, ++ * this can be done once at the program beginning for direct ++ * access case. By this we overcome limitations of only current ++ * headroom being accessible. ++ */ ++ return bpf_try_make_writable(skb, len ? : skb_headlen(skb)); ++} ++ ++static const struct bpf_func_proto bpf_skb_pull_data_proto = { ++ .func = bpf_skb_pull_data, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_ANYTHING, ++}; ++ ++BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, ++ u64, from, u64, to, u64, flags) ++{ ++ __sum16 *ptr; ++ ++ if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) ++ return -EINVAL; ++ if (unlikely(offset > 0xffff || offset & 1)) ++ return -EFAULT; ++ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) + return -EFAULT; + +- switch (BPF_HEADER_FIELD_SIZE(flags)) { ++ ptr = (__sum16 *)(skb->data + offset); ++ switch (flags & BPF_F_HDR_FIELD_MASK) { ++ case 0: ++ if (unlikely(from != 0)) ++ return -EINVAL; ++ ++ csum_replace_by_diff(ptr, to); ++ break; + case 2: + csum_replace2(ptr, from, to); + break; +@@ -1337,14 +1736,10 @@ static u64 bpf_l3_csum_replace(u64 r1, u + return -EINVAL; + } + +- if (ptr == &sum) +- /* skb_store_bits guaranteed to not return -EFAULT here */ +- skb_store_bits(skb, offset, ptr, sizeof(sum)); +- + return 0; + } + +-const struct bpf_func_proto bpf_l3_csum_replace_proto = { ++static const struct bpf_func_proto bpf_l3_csum_replace_proto = { + .func = bpf_l3_csum_replace, + .gpl_only = false, + .ret_type = RET_INTEGER, +@@ -1355,23 +1750,33 @@ const struct bpf_func_proto bpf_l3_csum_ + .arg5_type = ARG_ANYTHING, + }; + +-static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) ++BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, ++ u64, from, u64, to, u64, flags) + { +- struct sk_buff *skb = (struct sk_buff *) (long) r1; +- bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags); +- int offset = (int) r2; +- __sum16 sum, *ptr; ++ bool is_pseudo = flags & BPF_F_PSEUDO_HDR; ++ bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; ++ bool do_mforce = flags & BPF_F_MARK_ENFORCE; ++ __sum16 *ptr; + +- if (unlikely((u32) offset > 0xffff)) ++ if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE | ++ BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) ++ return -EINVAL; ++ if (unlikely(offset > 0xffff || offset & 1)) + return -EFAULT; +- if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) ++ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) + return -EFAULT; + +- ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); +- if (unlikely(!ptr)) +- return -EFAULT; ++ ptr = (__sum16 *)(skb->data + offset); ++ if (is_mmzero && !do_mforce && !*ptr) ++ return 0; + +- switch (BPF_HEADER_FIELD_SIZE(flags)) { ++ switch (flags & BPF_F_HDR_FIELD_MASK) { ++ case 0: ++ if (unlikely(from != 0)) ++ return -EINVAL; ++ ++ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo); ++ break; + case 2: + inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); + break; +@@ -1382,14 +1787,12 @@ static u64 bpf_l4_csum_replace(u64 r1, u + return -EINVAL; + } + +- if (ptr == &sum) +- /* skb_store_bits guaranteed to not return -EFAULT here */ +- skb_store_bits(skb, offset, ptr, sizeof(sum)); +- ++ if (is_mmzero && !*ptr) ++ *ptr = CSUM_MANGLED_0; + return 0; + } + +-const struct bpf_func_proto bpf_l4_csum_replace_proto = { ++static const struct bpf_func_proto bpf_l4_csum_replace_proto = { + .func = bpf_l4_csum_replace, + .gpl_only = false, + .ret_type = RET_INTEGER, +@@ -1400,30 +1803,172 @@ const struct bpf_func_proto bpf_l4_csum_ + .arg5_type = ARG_ANYTHING, + }; + +-#define BPF_IS_REDIRECT_INGRESS(flags) ((flags) & 1) ++BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, ++ __be32 *, to, u32, to_size, __wsum, seed) ++{ ++ struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); ++ u32 diff_size = from_size + to_size; ++ int i, j = 0; ++ ++ /* This is quite flexible, some examples: ++ * ++ * from_size == 0, to_size > 0, seed := csum --> pushing data ++ * from_size > 0, to_size == 0, seed := csum --> pulling data ++ * from_size > 0, to_size > 0, seed := 0 --> diffing data ++ * ++ * Even for diffing, from_size and to_size don't need to be equal. ++ */ ++ if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) || ++ diff_size > sizeof(sp->diff))) ++ return -EINVAL; ++ ++ for (i = 0; i < from_size / sizeof(__be32); i++, j++) ++ sp->diff[j] = ~from[i]; ++ for (i = 0; i < to_size / sizeof(__be32); i++, j++) ++ sp->diff[j] = to[i]; ++ ++ return csum_partial(sp->diff, diff_size, seed); ++} ++ ++static const struct bpf_func_proto bpf_csum_diff_proto = { ++ .func = bpf_csum_diff, ++ .gpl_only = false, ++ .pkt_access = true, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_MEM_OR_NULL, ++ .arg2_type = ARG_CONST_SIZE_OR_ZERO, ++ .arg3_type = ARG_PTR_TO_MEM_OR_NULL, ++ .arg4_type = ARG_CONST_SIZE_OR_ZERO, ++ .arg5_type = ARG_ANYTHING, ++}; ++ ++BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum) ++{ ++ /* The interface is to be used in combination with bpf_csum_diff() ++ * for direct packet writes. csum rotation for alignment as well ++ * as emulating csum_sub() can be done from the eBPF program. ++ */ ++ if (skb->ip_summed == CHECKSUM_COMPLETE) ++ return (skb->csum = csum_add(skb->csum, csum)); ++ ++ return -ENOTSUPP; ++} ++ ++static const struct bpf_func_proto bpf_csum_update_proto = { ++ .func = bpf_csum_update, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_ANYTHING, ++}; ++ ++static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) ++{ ++ return dev_forward_skb(dev, skb); ++} ++ ++static inline int __bpf_rx_skb_no_mac(struct net_device *dev, ++ struct sk_buff *skb) ++{ ++ int ret = ____dev_forward_skb(dev, skb); ++ ++ if (likely(!ret)) { ++ skb->dev = dev; ++ ret = netif_rx(skb); ++ } ++ ++ return ret; ++} ++ ++static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) ++{ ++ int ret; ++ ++ skb->dev = dev; ++ skb->tstamp.tv64 = 0; ++ ++ ret = dev_queue_xmit(skb); ++ ++ return ret; ++} ++ ++static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, ++ u32 flags) ++{ ++ unsigned int mlen = skb_network_offset(skb); ++ ++ if (mlen) { ++ __skb_pull(skb, mlen); ++ ++ /* At ingress, the mac header has already been pulled once. ++ * At egress, skb_pospull_rcsum has to be done in case that ++ * the skb is originated from ingress (i.e. a forwarded skb) ++ * to ensure that rcsum starts at net header. ++ */ ++ if (!skb_at_tc_ingress(skb)) ++ skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); ++ } ++ skb_pop_mac_header(skb); ++ skb_reset_mac_len(skb); ++ return flags & BPF_F_INGRESS ? ++ __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb); ++} + +-static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) ++static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, ++ u32 flags) ++{ ++ /* Verify that a link layer header is carried */ ++ if (unlikely(skb->mac_header >= skb->network_header)) { ++ kfree_skb(skb); ++ return -ERANGE; ++ } ++ ++ bpf_push_mac_rcsum(skb); ++ return flags & BPF_F_INGRESS ? ++ __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); ++} ++ ++static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, ++ u32 flags) ++{ ++ if (dev_is_mac_header_xmit(dev)) ++ return __bpf_redirect_common(skb, dev, flags); ++ else ++ return __bpf_redirect_no_mac(skb, dev, flags); ++} ++ ++BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) + { +- struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; + struct net_device *dev; ++ struct sk_buff *clone; ++ int ret; ++ ++ if (unlikely(flags & ~(BPF_F_INGRESS))) ++ return -EINVAL; + + dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex); + if (unlikely(!dev)) + return -EINVAL; + +- skb2 = skb_clone(skb, GFP_ATOMIC); +- if (unlikely(!skb2)) ++ clone = skb_clone(skb, GFP_ATOMIC); ++ if (unlikely(!clone)) + return -ENOMEM; + +- if (BPF_IS_REDIRECT_INGRESS(flags)) +- return dev_forward_skb(dev, skb2); ++ /* For direct write, we need to keep the invariant that the skbs ++ * we're dealing with need to be uncloned. Should uncloning fail ++ * here, we need to free the just generated clone to unclone once ++ * again. ++ */ ++ ret = bpf_try_make_head_writable(skb); ++ if (unlikely(ret)) { ++ kfree_skb(clone); ++ return -ENOMEM; ++ } + +- skb2->dev = dev; +- skb_sender_cpu_clear(skb2); +- return dev_queue_xmit(skb2); ++ return __bpf_redirect(clone, dev, flags); + } + +-const struct bpf_func_proto bpf_clone_redirect_proto = { ++static const struct bpf_func_proto bpf_clone_redirect_proto = { + .func = bpf_clone_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, +@@ -1432,42 +1977,38 @@ const struct bpf_func_proto bpf_clone_re + .arg3_type = ARG_ANYTHING, + }; + +-struct redirect_info { +- u32 ifindex; +- u32 flags; +-}; ++DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); ++EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); + +-static DEFINE_PER_CPU(struct redirect_info, redirect_info); +-static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) ++BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) + { +- struct redirect_info *ri = this_cpu_ptr(&redirect_info); ++ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); ++ ++ if (unlikely(flags & ~(BPF_F_INGRESS))) ++ return TC_ACT_SHOT; + +- ri->ifindex = ifindex; + ri->flags = flags; ++ ri->tgt_index = ifindex; ++ + return TC_ACT_REDIRECT; + } + + int skb_do_redirect(struct sk_buff *skb) + { +- struct redirect_info *ri = this_cpu_ptr(&redirect_info); ++ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct net_device *dev; + +- dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); +- ri->ifindex = 0; ++ dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index); ++ ri->tgt_index = 0; + if (unlikely(!dev)) { + kfree_skb(skb); + return -EINVAL; + } + +- if (BPF_IS_REDIRECT_INGRESS(ri->flags)) +- return dev_forward_skb(dev, skb); +- +- skb->dev = dev; +- skb_sender_cpu_clear(skb); +- return dev_queue_xmit(skb); ++ return __bpf_redirect(skb, dev, ri->flags); + } + +-const struct bpf_func_proto bpf_redirect_proto = { ++static const struct bpf_func_proto bpf_redirect_proto = { + .func = bpf_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, +@@ -1475,50 +2016,75 @@ const struct bpf_func_proto bpf_redirect + .arg2_type = ARG_ANYTHING, + }; + +-static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) ++BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb) + { +- return task_get_classid((struct sk_buff *) (unsigned long) r1); ++ /* If skb_clear_hash() was called due to mangling, we can ++ * trigger SW recalculation here. Later access to hash ++ * can then use the inline skb->hash via context directly ++ * instead of calling this helper again. ++ */ ++ return skb_get_hash(skb); + } + +-static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { +- .func = bpf_get_cgroup_classid, +- .gpl_only = false, +- .ret_type = RET_INTEGER, +- .arg1_type = ARG_PTR_TO_CTX, ++static const struct bpf_func_proto bpf_get_hash_recalc_proto = { ++ .func = bpf_get_hash_recalc, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, + }; + +-static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) ++BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb) + { +-#ifdef CONFIG_IP_ROUTE_CLASSID +- const struct dst_entry *dst; ++ /* After all direct packet write, this can be used once for ++ * triggering a lazy recalc on next skb_get_hash() invocation. ++ */ ++ skb_clear_hash(skb); ++ return 0; ++} + +- dst = skb_dst((struct sk_buff *) (unsigned long) r1); +- if (dst) +- return dst->tclassid; +-#endif ++static const struct bpf_func_proto bpf_set_hash_invalid_proto = { ++ .func = bpf_set_hash_invalid, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++}; ++ ++BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash) ++{ ++ /* Set user specified hash as L4(+), so that it gets returned ++ * on skb_get_hash() call unless BPF prog later on triggers a ++ * skb_clear_hash(). ++ */ ++ __skb_set_sw_hash(skb, hash, true); + return 0; + } + +-static const struct bpf_func_proto bpf_get_route_realm_proto = { +- .func = bpf_get_route_realm, +- .gpl_only = false, +- .ret_type = RET_INTEGER, +- .arg1_type = ARG_PTR_TO_CTX, ++static const struct bpf_func_proto bpf_set_hash_proto = { ++ .func = bpf_set_hash, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_ANYTHING, + }; + +-static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) ++BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, ++ u16, vlan_tci) + { +- struct sk_buff *skb = (struct sk_buff *) (long) r1; +- __be16 vlan_proto = (__force __be16) r2; ++ int ret; + + if (unlikely(vlan_proto != htons(ETH_P_8021Q) && + vlan_proto != htons(ETH_P_8021AD))) + vlan_proto = htons(ETH_P_8021Q); + +- return skb_vlan_push(skb, vlan_proto, vlan_tci); ++ bpf_push_mac_rcsum(skb); ++ ret = skb_vlan_push(skb, vlan_proto, vlan_tci); ++ bpf_pull_mac_rcsum(skb); ++ ++ bpf_compute_data_pointers(skb); ++ return ret; + } + +-const struct bpf_func_proto bpf_skb_vlan_push_proto = { ++static const struct bpf_func_proto bpf_skb_vlan_push_proto = { + .func = bpf_skb_vlan_push, + .gpl_only = false, + .ret_type = RET_INTEGER, +@@ -1526,116 +2092,401 @@ const struct bpf_func_proto bpf_skb_vlan + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + }; +-EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto); + +-static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) ++BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) + { +- struct sk_buff *skb = (struct sk_buff *) (long) r1; ++ int ret; ++ ++ bpf_push_mac_rcsum(skb); ++ ret = skb_vlan_pop(skb); ++ bpf_pull_mac_rcsum(skb); + +- return skb_vlan_pop(skb); ++ bpf_compute_data_pointers(skb); ++ return ret; + } + +-const struct bpf_func_proto bpf_skb_vlan_pop_proto = { ++static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { + .func = bpf_skb_vlan_pop, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + }; +-EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto); + +-bool bpf_helper_changes_skb_data(void *func) ++BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type) + { +- if (func == bpf_skb_vlan_push) +- return true; +- if (func == bpf_skb_vlan_pop) +- return true; +- if (func == bpf_skb_store_bytes) +- return true; +- if (func == bpf_l3_csum_replace) +- return true; +- if (func == bpf_l4_csum_replace) +- return true; ++ /* We only allow a restricted subset to be changed for now. */ ++ if (unlikely(!skb_pkt_type_ok(skb->pkt_type) || ++ !skb_pkt_type_ok(pkt_type))) ++ return -EINVAL; + +- return false; ++ skb->pkt_type = pkt_type; ++ return 0; ++} ++ ++static const struct bpf_func_proto bpf_skb_change_type_proto = { ++ .func = bpf_skb_change_type, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_ANYTHING, ++}; ++ ++#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \ ++ BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) ++ ++#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \ ++ BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ ++ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ ++ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ ++ BPF_F_ADJ_ROOM_ENCAP_L2( \ ++ BPF_ADJ_ROOM_ENCAP_L2_MASK)) ++ ++#define BPF_SKB_MAX_LEN SKB_MAX_ALLOC ++ ++static u32 __bpf_skb_min_len(const struct sk_buff *skb) ++{ ++ u32 min_len = skb_network_offset(skb); ++ ++ if (skb_transport_header_was_set(skb)) ++ min_len = skb_transport_offset(skb); ++ if (skb->ip_summed == CHECKSUM_PARTIAL) ++ min_len = skb_checksum_start_offset(skb) + ++ skb->csum_offset + sizeof(__sum16); ++ return min_len; + } + +-static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) ++static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len) + { +- struct sk_buff *skb = (struct sk_buff *) (long) r1; +- struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2; +- struct ip_tunnel_info *info = skb_tunnel_info(skb); ++ unsigned int old_len = skb->len; ++ int ret; + +- if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info)) ++ ret = __skb_grow_rcsum(skb, new_len); ++ if (!ret) ++ memset(skb->data + old_len, 0, new_len - old_len); ++ return ret; ++} ++ ++static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len) ++{ ++ return __skb_trim_rcsum(skb, new_len); ++} ++ ++static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len, ++ u64 flags) ++{ ++ u32 max_len = BPF_SKB_MAX_LEN; ++ u32 min_len = __bpf_skb_min_len(skb); ++ int ret; ++ ++ if (unlikely(flags || new_len > max_len || new_len < min_len)) + return -EINVAL; +- if (ip_tunnel_info_af(info) != AF_INET) ++ if (skb->encapsulation) ++ return -ENOTSUPP; ++ ++ /* The basic idea of this helper is that it's performing the ++ * needed work to either grow or trim an skb, and eBPF program ++ * rewrites the rest via helpers like bpf_skb_store_bytes(), ++ * bpf_lX_csum_replace() and others rather than passing a raw ++ * buffer here. This one is a slow path helper and intended ++ * for replies with control messages. ++ * ++ * Like in bpf_skb_change_proto(), we want to keep this rather ++ * minimal and without protocol specifics so that we are able ++ * to separate concerns as in bpf_skb_store_bytes() should only ++ * be the one responsible for writing buffers. ++ * ++ * It's really expected to be a slow path operation here for ++ * control message replies, so we're implicitly linearizing, ++ * uncloning and drop offloads from the skb by this. ++ */ ++ ret = __bpf_try_make_writable(skb, skb->len); ++ if (!ret) { ++ if (new_len > skb->len) ++ ret = bpf_skb_grow_rcsum(skb, new_len); ++ else if (new_len < skb->len) ++ ret = bpf_skb_trim_rcsum(skb, new_len); ++ if (!ret && skb_is_gso(skb)) ++ skb_gso_reset(skb); ++ } ++ return ret; ++} ++ ++BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, ++ u64, flags) ++{ ++ int ret = __bpf_skb_change_tail(skb, new_len, flags); ++ ++ bpf_compute_data_pointers(skb); ++ return ret; ++} ++ ++static const struct bpf_func_proto bpf_skb_change_tail_proto = { ++ .func = bpf_skb_change_tail, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_ANYTHING, ++ .arg3_type = ARG_ANYTHING, ++}; ++ ++static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, ++ u64 flags) ++{ ++ u32 max_len = BPF_SKB_MAX_LEN; ++ u32 new_len = skb->len + head_room; ++ int ret; ++ ++ if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) || ++ new_len < skb->len)) + return -EINVAL; + +- to->tunnel_id = be64_to_cpu(info->key.tun_id); +- to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); ++ ret = skb_cow(skb, head_room); ++ if (likely(!ret)) { ++ /* Idea for this helper is that we currently only ++ * allow to expand on mac header. This means that ++ * skb->protocol network header, etc, stay as is. ++ * Compared to bpf_skb_change_tail(), we're more ++ * flexible due to not needing to linearize or ++ * reset GSO. Intention for this helper is to be ++ * used by an L3 skb that needs to push mac header ++ * for redirection into L2 device. ++ */ ++ __skb_push(skb, head_room); ++ memset(skb->data, 0, head_room); ++ skb_reset_mac_header(skb); ++ skb_reset_mac_len(skb); ++ } + +- return 0; ++ return ret; + } + +-const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { +- .func = bpf_skb_get_tunnel_key, ++BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, ++ u64, flags) ++{ ++ int ret = __bpf_skb_change_head(skb, head_room, flags); ++ ++ bpf_compute_data_pointers(skb); ++ return ret; ++} ++ ++static const struct bpf_func_proto bpf_skb_change_head_proto = { ++ .func = bpf_skb_change_head, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_STACK, +- .arg3_type = ARG_CONST_STACK_SIZE, +- .arg4_type = ARG_ANYTHING, ++ .arg2_type = ARG_ANYTHING, ++ .arg3_type = ARG_ANYTHING, + }; + +-static struct metadata_dst __percpu *md_dst; ++void bpf_clear_redirect_map(struct bpf_map *map) ++{ ++ struct bpf_redirect_info *ri; ++ int cpu; ++ ++ for_each_possible_cpu(cpu) { ++ ri = per_cpu_ptr(&bpf_redirect_info, cpu); ++ /* Avoid polluting remote cacheline due to writes if ++ * not needed. Once we pass this test, we need the ++ * cmpxchg() to make sure it hasn't been changed in ++ * the meantime by remote CPU. ++ */ ++ if (unlikely(READ_ONCE(ri->map) == map)) ++ cmpxchg(&ri->map, map, NULL); ++ } ++} ++ ++static unsigned long bpf_skb_copy(void *dst_buff, const void *skb, ++ unsigned long off, unsigned long len) ++{ ++ void *ptr = skb_header_pointer(skb, off, len, dst_buff); ++ ++ if (unlikely(!ptr)) ++ return len; ++ if (ptr != dst_buff) ++ memcpy(dst_buff, ptr, len); ++ ++ return 0; ++} + +-static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) ++BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map, ++ u64, flags, void *, meta, u64, meta_size) + { +- struct sk_buff *skb = (struct sk_buff *) (long) r1; +- struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2; +- struct metadata_dst *md = this_cpu_ptr(md_dst); +- struct ip_tunnel_info *info; ++ u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32; + +- if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags)) ++ if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) + return -EINVAL; ++ if (unlikely(skb_size > skb->len)) ++ return -EFAULT; + +- skb_dst_drop(skb); +- dst_hold((struct dst_entry *) md); +- skb_dst_set(skb, (struct dst_entry *) md); ++ return bpf_event_output(map, flags, meta, meta_size, skb, skb_size, ++ bpf_skb_copy); ++} + +- info = &md->u.tun_info; +- info->mode = IP_TUNNEL_INFO_TX; +- info->key.tun_flags = TUNNEL_KEY; +- info->key.tun_id = cpu_to_be64(from->tunnel_id); +- info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); ++static const struct bpf_func_proto bpf_skb_event_output_proto = { ++ .func = bpf_skb_event_output, ++ .gpl_only = true, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_CONST_MAP_PTR, ++ .arg3_type = ARG_ANYTHING, ++ .arg4_type = ARG_PTR_TO_MEM, ++ .arg5_type = ARG_CONST_SIZE_OR_ZERO, ++}; ++ ++ ++const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; ++EXPORT_SYMBOL_GPL(ipv6_bpf_stub); ++ ++#ifdef CONFIG_XFRM ++BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index, ++ struct bpf_xfrm_state *, to, u32, size, u64, flags) ++{ ++ const struct sec_path *sp = skb_sec_path(skb); ++ const struct xfrm_state *x; ++ ++ if (!sp || unlikely(index >= sp->len || flags)) ++ goto err_clear; ++ ++ x = sp->xvec[index]; ++ ++ if (unlikely(size != sizeof(struct bpf_xfrm_state))) ++ goto err_clear; ++ ++ to->reqid = x->props.reqid; ++ to->spi = x->id.spi; ++ to->family = x->props.family; ++ to->ext = 0; ++ ++ if (to->family == AF_INET6) { ++ memcpy(to->remote_ipv6, x->props.saddr.a6, ++ sizeof(to->remote_ipv6)); ++ } else { ++ to->remote_ipv4 = x->props.saddr.a4; ++ memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3); ++ } + + return 0; ++err_clear: ++ memset(to, 0, size); ++ return -EINVAL; + } + +-const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { +- .func = bpf_skb_set_tunnel_key, ++static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { ++ .func = bpf_skb_get_xfrm_state, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_STACK, +- .arg3_type = ARG_CONST_STACK_SIZE, +- .arg4_type = ARG_ANYTHING, ++ .arg2_type = ARG_ANYTHING, ++ .arg3_type = ARG_PTR_TO_UNINIT_MEM, ++ .arg4_type = ARG_CONST_SIZE, ++ .arg5_type = ARG_ANYTHING, + }; ++#endif ++ + +-static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void) ++#if IS_ENABLED(CONFIG_LWTUNNEL_BPF) ++static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, ++ bool ingress) + { +- if (!md_dst) { +- /* race is not possible, since it's called from +- * verifier that is holding verifier mutex +- */ +- md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL); +- if (!md_dst) +- return NULL; ++ return bpf_lwt_push_ip_encap(skb, hdr, len, ingress); ++} ++#endif ++ ++BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr, ++ u32, len) ++{ ++ switch (type) { ++#if IS_ENABLED(CONFIG_LWTUNNEL_BPF) ++ case BPF_LWT_ENCAP_IP: ++ return bpf_push_ip_encap(skb, hdr, len, true /* ingress */); ++#endif ++ default: ++ return -EINVAL; + } +- return &bpf_skb_set_tunnel_key_proto; ++} ++ ++BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type, ++ void *, hdr, u32, len) ++{ ++ switch (type) { ++#if IS_ENABLED(CONFIG_LWTUNNEL_BPF) ++ case BPF_LWT_ENCAP_IP: ++ return bpf_push_ip_encap(skb, hdr, len, false /* egress */); ++#endif ++ default: ++ return -EINVAL; ++ } ++} ++ ++static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = { ++ .func = bpf_lwt_in_push_encap, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_ANYTHING, ++ .arg3_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_CONST_SIZE ++}; ++ ++static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = { ++ .func = bpf_lwt_xmit_push_encap, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_ANYTHING, ++ .arg3_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_CONST_SIZE ++}; ++ ++bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++ ++u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, u32 *target_size) ++{ ++ return 0; ++} ++ ++BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) ++{ ++ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) ++ return (unsigned long)sk; ++ ++ return (unsigned long)NULL; ++} ++ ++const struct bpf_func_proto bpf_tcp_sock_proto = { ++ .func = bpf_tcp_sock, ++ .gpl_only = false, ++ .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL, ++ .arg1_type = ARG_PTR_TO_SOCK_COMMON, ++}; ++ ++bool bpf_helper_changes_pkt_data(void *func) ++{ ++ if (func == bpf_skb_vlan_push || ++ func == bpf_skb_vlan_pop || ++ func == bpf_skb_store_bytes || ++ func == bpf_skb_change_head || ++ func == bpf_skb_change_tail || ++ func == bpf_skb_pull_data || ++ func == bpf_clone_redirect || ++ func == bpf_l3_csum_replace || ++ func == bpf_l4_csum_replace || ++ func == bpf_lwt_in_push_encap || ++ func == bpf_lwt_xmit_push_encap) ++ return true; ++ ++ return false; + } + + static const struct bpf_func_proto * +-sk_filter_func_proto(enum bpf_func_id func_id) ++bpf_base_func_proto(enum bpf_func_id func_id) + { + switch (func_id) { + case BPF_FUNC_map_lookup_elem: +@@ -1644,283 +2495,1168 @@ sk_filter_func_proto(enum bpf_func_id fu + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; ++ case BPF_FUNC_map_push_elem: ++ return &bpf_map_push_elem_proto; ++ case BPF_FUNC_map_pop_elem: ++ return &bpf_map_pop_elem_proto; ++ case BPF_FUNC_map_peek_elem: ++ return &bpf_map_peek_elem_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; + case BPF_FUNC_get_smp_processor_id: +- return &bpf_get_smp_processor_id_proto; ++ return &bpf_get_raw_smp_processor_id_proto; ++ case BPF_FUNC_get_numa_node_id: ++ return &bpf_get_numa_node_id_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; + case BPF_FUNC_ktime_get_ns: + return &bpf_ktime_get_ns_proto; ++ default: ++ break; ++ } ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return NULL; ++ ++ switch (func_id) { ++ case BPF_FUNC_spin_lock: ++ return &bpf_spin_lock_proto; ++ case BPF_FUNC_spin_unlock: ++ return &bpf_spin_unlock_proto; + case BPF_FUNC_trace_printk: +- if (capable(CAP_SYS_ADMIN)) +- return bpf_get_trace_printk_proto(); ++ return bpf_get_trace_printk_proto(); + default: + return NULL; + } + } + + static const struct bpf_func_proto * +-tc_cls_act_func_proto(enum bpf_func_id func_id) ++sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ return bpf_base_func_proto(func_id); ++} ++ ++static const struct bpf_func_proto * ++sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ return bpf_base_func_proto(func_id); ++} ++ ++const struct bpf_func_proto bpf_sk_storage_get_proto __weak; ++const struct bpf_func_proto bpf_sk_storage_delete_proto __weak; ++ ++static const struct bpf_func_proto * ++tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) + { + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; ++ case BPF_FUNC_skb_load_bytes: ++ return &bpf_skb_load_bytes_proto; ++ case BPF_FUNC_skb_load_bytes_relative: ++ return &bpf_skb_load_bytes_relative_proto; ++ case BPF_FUNC_skb_pull_data: ++ return &bpf_skb_pull_data_proto; ++ case BPF_FUNC_csum_diff: ++ return &bpf_csum_diff_proto; ++ case BPF_FUNC_csum_update: ++ return &bpf_csum_update_proto; + case BPF_FUNC_l3_csum_replace: + return &bpf_l3_csum_replace_proto; + case BPF_FUNC_l4_csum_replace: + return &bpf_l4_csum_replace_proto; + case BPF_FUNC_clone_redirect: + return &bpf_clone_redirect_proto; +- case BPF_FUNC_get_cgroup_classid: +- return &bpf_get_cgroup_classid_proto; + case BPF_FUNC_skb_vlan_push: + return &bpf_skb_vlan_push_proto; + case BPF_FUNC_skb_vlan_pop: + return &bpf_skb_vlan_pop_proto; +- case BPF_FUNC_skb_get_tunnel_key: +- return &bpf_skb_get_tunnel_key_proto; +- case BPF_FUNC_skb_set_tunnel_key: +- return bpf_get_skb_set_tunnel_key_proto(); ++ case BPF_FUNC_skb_change_type: ++ return &bpf_skb_change_type_proto; ++ case BPF_FUNC_skb_change_tail: ++ return &bpf_skb_change_tail_proto; ++ case BPF_FUNC_redirect: ++ return &bpf_redirect_proto; ++ case BPF_FUNC_get_hash_recalc: ++ return &bpf_get_hash_recalc_proto; ++ case BPF_FUNC_set_hash_invalid: ++ return &bpf_set_hash_invalid_proto; ++ case BPF_FUNC_set_hash: ++ return &bpf_set_hash_proto; ++ case BPF_FUNC_perf_event_output: ++ return &bpf_skb_event_output_proto; ++ case BPF_FUNC_get_smp_processor_id: ++ return &bpf_get_smp_processor_id_proto; ++#ifdef CONFIG_XFRM ++ case BPF_FUNC_skb_get_xfrm_state: ++ return &bpf_skb_get_xfrm_state_proto; ++#endif ++ default: ++ return bpf_base_func_proto(func_id); ++ } ++} ++ ++static const struct bpf_func_proto * ++xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ switch (func_id) { ++ default: ++ return bpf_base_func_proto(func_id); ++ } ++} ++ ++const struct bpf_func_proto bpf_sock_map_update_proto __weak; ++const struct bpf_func_proto bpf_sock_hash_update_proto __weak; ++ ++static const struct bpf_func_proto * ++sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ switch (func_id) { ++ default: ++ return bpf_base_func_proto(func_id); ++ } ++} ++ ++const struct bpf_func_proto bpf_msg_redirect_map_proto __weak; ++const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak; ++ ++static const struct bpf_func_proto * ++sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ switch (func_id) { ++ default: ++ return bpf_base_func_proto(func_id); ++ } ++} ++ ++const struct bpf_func_proto bpf_sk_redirect_map_proto __weak; ++const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak; ++ ++static const struct bpf_func_proto * ++sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ return bpf_base_func_proto(func_id); ++} ++ ++static const struct bpf_func_proto * ++flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ switch (func_id) { ++ default: ++ return bpf_base_func_proto(func_id); ++ } ++} ++ ++static const struct bpf_func_proto * ++lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ switch (func_id) { ++ case BPF_FUNC_skb_load_bytes: ++ return &bpf_skb_load_bytes_proto; ++ case BPF_FUNC_skb_pull_data: ++ return &bpf_skb_pull_data_proto; ++ case BPF_FUNC_csum_diff: ++ return &bpf_csum_diff_proto; ++ case BPF_FUNC_get_hash_recalc: ++ return &bpf_get_hash_recalc_proto; ++ case BPF_FUNC_perf_event_output: ++ return &bpf_skb_event_output_proto; ++ case BPF_FUNC_get_smp_processor_id: ++ return &bpf_get_smp_processor_id_proto; ++ default: ++ return bpf_base_func_proto(func_id); ++ } ++} ++ ++static const struct bpf_func_proto * ++lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ switch (func_id) { ++ case BPF_FUNC_lwt_push_encap: ++ return &bpf_lwt_in_push_encap_proto; ++ default: ++ return lwt_out_func_proto(func_id, prog); ++ } ++} ++ ++static const struct bpf_func_proto * ++lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ++{ ++ switch (func_id) { + case BPF_FUNC_redirect: + return &bpf_redirect_proto; +- case BPF_FUNC_get_route_realm: +- return &bpf_get_route_realm_proto; ++ case BPF_FUNC_clone_redirect: ++ return &bpf_clone_redirect_proto; ++ case BPF_FUNC_skb_change_tail: ++ return &bpf_skb_change_tail_proto; ++ case BPF_FUNC_skb_change_head: ++ return &bpf_skb_change_head_proto; ++ case BPF_FUNC_skb_store_bytes: ++ return &bpf_skb_store_bytes_proto; ++ case BPF_FUNC_csum_update: ++ return &bpf_csum_update_proto; ++ case BPF_FUNC_l3_csum_replace: ++ return &bpf_l3_csum_replace_proto; ++ case BPF_FUNC_l4_csum_replace: ++ return &bpf_l4_csum_replace_proto; ++ case BPF_FUNC_set_hash_invalid: ++ return &bpf_set_hash_invalid_proto; ++ case BPF_FUNC_lwt_push_encap: ++ return &bpf_lwt_xmit_push_encap_proto; + default: +- return sk_filter_func_proto(func_id); ++ return lwt_out_func_proto(func_id, prog); + } + } + +-static bool __is_valid_access(int off, int size, enum bpf_access_type type) ++static const struct bpf_func_proto * ++lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) + { +- /* check bounds */ ++ switch (func_id) { ++ default: ++ return lwt_out_func_proto(func_id, prog); ++ } ++} ++ ++static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ const int size_default = sizeof(__u32); ++ + if (off < 0 || off >= sizeof(struct __sk_buff)) + return false; + +- /* disallow misaligned access */ ++ /* The verifier guarantees that size > 0. */ + if (off % size != 0) + return false; + +- /* all __sk_buff fields are __u32 */ +- if (size != 4) ++ switch (off) { ++ case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): ++ if (off + size > offsetofend(struct __sk_buff, cb[4])) ++ return false; ++ break; ++ case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]): ++ case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]): ++ case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4): ++ case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4): ++ case bpf_ctx_range(struct __sk_buff, data): ++ case bpf_ctx_range(struct __sk_buff, data_meta): ++ case bpf_ctx_range(struct __sk_buff, data_end): ++ if (size != size_default) ++ return false; ++ break; ++ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): + return false; ++ case bpf_ctx_range(struct __sk_buff, tstamp): ++ if (size != sizeof(__u64)) ++ return false; ++ break; ++ case offsetof(struct __sk_buff, sk): ++ if (type == BPF_WRITE || size != sizeof(__u64)) ++ return false; ++ info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; ++ break; ++ default: ++ /* Only narrow read access allowed for now. */ ++ if (type == BPF_WRITE) { ++ if (size != size_default) ++ return false; ++ } else { ++ bpf_ctx_record_field_size(info, size_default); ++ if (!bpf_ctx_narrow_access_ok(off, size, size_default)) ++ return false; ++ } ++ } + + return true; + } + + static bool sk_filter_is_valid_access(int off, int size, +- enum bpf_access_type type) ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) + { +- if (off == offsetof(struct __sk_buff, tc_classid)) ++ return false; ++} ++ ++static bool lwt_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ switch (off) { ++ case bpf_ctx_range(struct __sk_buff, tc_classid): ++ case bpf_ctx_range_till(struct __sk_buff, family, local_port): ++ case bpf_ctx_range(struct __sk_buff, data_meta): ++ case bpf_ctx_range(struct __sk_buff, tstamp): ++ case bpf_ctx_range(struct __sk_buff, wire_len): + return false; ++ } + + if (type == BPF_WRITE) { + switch (off) { +- case offsetof(struct __sk_buff, cb[0]) ... +- offsetof(struct __sk_buff, cb[4]): ++ case bpf_ctx_range(struct __sk_buff, mark): ++ case bpf_ctx_range(struct __sk_buff, priority): ++ case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): + break; + default: + return false; + } + } + +- return __is_valid_access(off, size, type); ++ switch (off) { ++ case bpf_ctx_range(struct __sk_buff, data): ++ info->reg_type = PTR_TO_PACKET; ++ break; ++ case bpf_ctx_range(struct __sk_buff, data_end): ++ info->reg_type = PTR_TO_PACKET_END; ++ break; ++ } ++ ++ return bpf_skb_is_valid_access(off, size, type, prog, info); + } + +-static bool tc_cls_act_is_valid_access(int off, int size, +- enum bpf_access_type type) ++ ++bool bpf_sock_common_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ struct bpf_insn_access_aux *info) ++{ ++ switch (off) { ++ case bpf_ctx_range_till(struct bpf_sock, type, priority): ++ return false; ++ default: ++ return bpf_sock_is_valid_access(off, size, type, info); ++ } ++} ++ ++bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++ ++static bool sock_filter_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++ ++static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write, ++ const struct bpf_prog *prog) ++{ ++ /* Neither direct read nor direct write requires any preliminary ++ * action. ++ */ ++ return 0; ++} ++ ++static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, ++ const struct bpf_prog *prog, int drop_verdict) ++{ ++ struct bpf_insn *insn = insn_buf; ++ ++ if (!direct_write) ++ return 0; ++ ++ /* if (!skb->cloned) ++ * goto start; ++ * ++ * (Fast-path, otherwise approximation that we might be ++ * a clone, do the rest in helper.) ++ */ ++ *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET()); ++ *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK); ++ *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7); ++ ++ /* ret = bpf_skb_pull_data(skb, 0); */ ++ *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); ++ *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2); ++ *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, ++ BPF_FUNC_skb_pull_data); ++ /* if (!ret) ++ * goto restore; ++ * return TC_ACT_SHOT; ++ */ ++ *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2); ++ *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict); ++ *insn++ = BPF_EXIT_INSN(); ++ ++ /* restore: */ ++ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); ++ /* start: */ ++ *insn++ = prog->insnsi[0]; ++ ++ return insn - insn_buf; ++} ++ ++static int bpf_gen_ld_abs(const struct bpf_insn *orig, ++ struct bpf_insn *insn_buf) ++{ ++ bool indirect = BPF_MODE(orig->code) == BPF_IND; ++ struct bpf_insn *insn = insn_buf; ++ ++ if (!indirect) { ++ *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm); ++ } else { ++ *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg); ++ if (orig->imm) ++ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm); ++ } ++ /* We're guaranteed here that CTX is in R6. */ ++ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX); ++ ++ switch (BPF_SIZE(orig->code)) { ++ case BPF_B: ++ *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache); ++ break; ++ case BPF_H: ++ *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache); ++ break; ++ case BPF_W: ++ *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache); ++ break; ++ } ++ ++ *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2); ++ *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0); ++ *insn++ = BPF_EXIT_INSN(); ++ ++ return insn - insn_buf; ++} ++ ++static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, ++ const struct bpf_prog *prog) + { +- if (off == offsetof(struct __sk_buff, tc_classid)) +- return type == BPF_WRITE ? true : false; ++ return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT); ++} + ++static bool tc_cls_act_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ + if (type == BPF_WRITE) { + switch (off) { +- case offsetof(struct __sk_buff, mark): +- case offsetof(struct __sk_buff, tc_index): +- case offsetof(struct __sk_buff, priority): +- case offsetof(struct __sk_buff, cb[0]) ... +- offsetof(struct __sk_buff, cb[4]): ++ case bpf_ctx_range(struct __sk_buff, mark): ++ case bpf_ctx_range(struct __sk_buff, tc_index): ++ case bpf_ctx_range(struct __sk_buff, priority): ++ case bpf_ctx_range(struct __sk_buff, tc_classid): ++ case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): ++ case bpf_ctx_range(struct __sk_buff, tstamp): ++ case bpf_ctx_range(struct __sk_buff, queue_mapping): + break; + default: + return false; + } + } +- return __is_valid_access(off, size, type); ++ ++ switch (off) { ++ case bpf_ctx_range(struct __sk_buff, data): ++ info->reg_type = PTR_TO_PACKET; ++ break; ++ case bpf_ctx_range(struct __sk_buff, data_meta): ++ info->reg_type = PTR_TO_PACKET_META; ++ break; ++ case bpf_ctx_range(struct __sk_buff, data_end): ++ info->reg_type = PTR_TO_PACKET_END; ++ break; ++ case bpf_ctx_range_till(struct __sk_buff, family, local_port): ++ return false; ++ } ++ ++ return bpf_skb_is_valid_access(off, size, type, prog, info); ++} ++ ++static bool xdp_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++ ++void bpf_warn_invalid_xdp_action(u32 act) ++{ ++} ++EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); ++ ++static bool sock_addr_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++ ++static bool sock_ops_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++ ++static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write, ++ const struct bpf_prog *prog) ++{ ++ return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP); ++} ++ ++static bool sk_skb_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++ ++static bool sk_msg_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; ++} ++ ++static bool flow_dissector_is_valid_access(int off, int size, ++ enum bpf_access_type type, ++ const struct bpf_prog *prog, ++ struct bpf_insn_access_aux *info) ++{ ++ return false; + } + +-static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, +- int src_reg, int ctx_off, +- struct bpf_insn *insn_buf, +- struct bpf_prog *prog) ++static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, ++ u32 *target_size) ++ ++{ ++ return 0; ++} ++ ++static u32 bpf_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, u32 *target_size) + { + struct bpf_insn *insn = insn_buf; ++ int off; + +- switch (ctx_off) { ++ switch (si->off) { + case offsetof(struct __sk_buff, len): +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); +- +- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, +- offsetof(struct sk_buff, len)); ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, len, 4, ++ target_size)); + break; + + case offsetof(struct __sk_buff, protocol): +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); +- +- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, +- offsetof(struct sk_buff, protocol)); ++ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, protocol, 2, ++ target_size)); + break; + + case offsetof(struct __sk_buff, vlan_proto): +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); +- +- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, +- offsetof(struct sk_buff, vlan_proto)); ++ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, vlan_proto, 2, ++ target_size)); + break; + + case offsetof(struct __sk_buff, priority): +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4); +- + if (type == BPF_WRITE) +- *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, +- offsetof(struct sk_buff, priority)); ++ *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, priority, 4, ++ target_size)); + else +- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, +- offsetof(struct sk_buff, priority)); ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, priority, 4, ++ target_size)); + break; + + case offsetof(struct __sk_buff, ingress_ifindex): +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4); +- +- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, +- offsetof(struct sk_buff, skb_iif)); ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, skb_iif, 4, ++ target_size)); + break; + + case offsetof(struct __sk_buff, ifindex): +- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); +- +- *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), +- dst_reg, src_reg, ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), ++ si->dst_reg, si->src_reg, + offsetof(struct sk_buff, dev)); +- *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1); +- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg, +- offsetof(struct net_device, ifindex)); ++ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, ++ bpf_target_off(struct net_device, ifindex, 4, ++ target_size)); + break; + + case offsetof(struct __sk_buff, hash): +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); +- +- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, +- offsetof(struct sk_buff, hash)); ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, hash, 4, ++ target_size)); + break; + + case offsetof(struct __sk_buff, mark): +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); +- + if (type == BPF_WRITE) +- *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, +- offsetof(struct sk_buff, mark)); ++ *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, mark, 4, ++ target_size)); + else +- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, +- offsetof(struct sk_buff, mark)); ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, mark, 4, ++ target_size)); + break; + + case offsetof(struct __sk_buff, pkt_type): +- return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn); ++ *target_size = 1; ++ *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, ++ PKT_TYPE_OFFSET()); ++ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX); ++#ifdef __BIG_ENDIAN_BITFIELD ++ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5); ++#endif ++ break; + + case offsetof(struct __sk_buff, queue_mapping): +- return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn); ++ if (type == BPF_WRITE) { ++ *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, USHRT_MAX, 1); ++ *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, ++ queue_mapping, ++ 2, target_size)); ++ } else { ++ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, ++ queue_mapping, ++ 2, target_size)); ++ } ++ break; + + case offsetof(struct __sk_buff, vlan_present): +- return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, +- dst_reg, src_reg, insn); ++ *target_size = 1; ++ *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, ++ PKT_VLAN_PRESENT_OFFSET()); ++ if (PKT_VLAN_PRESENT_BIT) ++ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT); ++ if (PKT_VLAN_PRESENT_BIT < 7) ++ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1); ++ break; + + case offsetof(struct __sk_buff, vlan_tci): +- return convert_skb_access(SKF_AD_VLAN_TAG, +- dst_reg, src_reg, insn); ++ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, vlan_tci, 2, ++ target_size)); ++ break; + + case offsetof(struct __sk_buff, cb[0]) ... +- offsetof(struct __sk_buff, cb[4]): ++ offsetofend(struct __sk_buff, cb[4]) - 1: + BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); ++ BUILD_BUG_ON((offsetof(struct sk_buff, cb) + ++ offsetof(struct qdisc_skb_cb, data)) % ++ sizeof(__u64)); + + prog->cb_access = 1; +- ctx_off -= offsetof(struct __sk_buff, cb[0]); +- ctx_off += offsetof(struct sk_buff, cb); +- ctx_off += offsetof(struct qdisc_skb_cb, data); ++ off = si->off; ++ off -= offsetof(struct __sk_buff, cb[0]); ++ off += offsetof(struct sk_buff, cb); ++ off += offsetof(struct qdisc_skb_cb, data); + if (type == BPF_WRITE) +- *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off); ++ *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, ++ si->src_reg, off); + else +- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off); ++ *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, ++ si->src_reg, off); + break; + + case offsetof(struct __sk_buff, tc_classid): +- ctx_off -= offsetof(struct __sk_buff, tc_classid); +- ctx_off += offsetof(struct sk_buff, cb); +- ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); +- WARN_ON(type != BPF_WRITE); +- *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); ++ BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2); ++ ++ off = si->off; ++ off -= offsetof(struct __sk_buff, tc_classid); ++ off += offsetof(struct sk_buff, cb); ++ off += offsetof(struct qdisc_skb_cb, tc_classid); ++ *target_size = 2; ++ if (type == BPF_WRITE) ++ *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, ++ si->src_reg, off); ++ else ++ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, ++ si->src_reg, off); ++ break; ++ ++ case offsetof(struct __sk_buff, data): ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, data)); ++ break; ++ ++ case offsetof(struct __sk_buff, data_meta): ++ off = si->off; ++ off -= offsetof(struct __sk_buff, data_meta); ++ off += offsetof(struct sk_buff, cb); ++ off += offsetof(struct bpf_skb_data_end, data_meta); ++ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, ++ si->src_reg, off); ++ break; ++ ++ case offsetof(struct __sk_buff, data_end): ++ off = si->off; ++ off -= offsetof(struct __sk_buff, data_end); ++ off += offsetof(struct sk_buff, cb); ++ off += offsetof(struct bpf_skb_data_end, data_end); ++ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, ++ si->src_reg, off); + break; + + case offsetof(struct __sk_buff, tc_index): + #ifdef CONFIG_NET_SCHED +- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); +- + if (type == BPF_WRITE) +- *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, +- offsetof(struct sk_buff, tc_index)); ++ *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, tc_index, 2, ++ target_size)); + else +- *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, +- offsetof(struct sk_buff, tc_index)); +- break; ++ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, tc_index, 2, ++ target_size)); + #else ++ *target_size = 2; + if (type == BPF_WRITE) +- *insn++ = BPF_MOV64_REG(dst_reg, dst_reg); ++ *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); + else +- *insn++ = BPF_MOV64_IMM(dst_reg, 0); ++ *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); ++#endif ++ break; ++ ++ case offsetof(struct __sk_buff, napi_id): ++ *target_size = 4; ++ *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); ++ break; ++ case offsetof(struct __sk_buff, family): ++ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); ++ ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, sk)); ++ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, ++ bpf_target_off(struct sock_common, ++ skc_family, ++ 2, target_size)); ++ break; ++ case offsetof(struct __sk_buff, remote_ip4): ++ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); ++ ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, sk)); ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, ++ bpf_target_off(struct sock_common, ++ skc_daddr, ++ 4, target_size)); ++ break; ++ case offsetof(struct __sk_buff, local_ip4): ++ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, ++ skc_rcv_saddr) != 4); ++ ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, sk)); ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, ++ bpf_target_off(struct sock_common, ++ skc_rcv_saddr, ++ 4, target_size)); ++ break; ++ case offsetof(struct __sk_buff, remote_ip6[0]) ... ++ offsetof(struct __sk_buff, remote_ip6[3]): ++#if IS_ENABLED(CONFIG_IPV6) ++ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, ++ skc_v6_daddr.s6_addr32[0]) != 4); ++ ++ off = si->off; ++ off -= offsetof(struct __sk_buff, remote_ip6[0]); ++ ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, sk)); ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, ++ offsetof(struct sock_common, ++ skc_v6_daddr.s6_addr32[0]) + ++ off); ++#else ++ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); ++#endif ++ break; ++ case offsetof(struct __sk_buff, local_ip6[0]) ... ++ offsetof(struct __sk_buff, local_ip6[3]): ++#if IS_ENABLED(CONFIG_IPV6) ++ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, ++ skc_v6_rcv_saddr.s6_addr32[0]) != 4); ++ ++ off = si->off; ++ off -= offsetof(struct __sk_buff, local_ip6[0]); ++ ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, sk)); ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, ++ offsetof(struct sock_common, ++ skc_v6_rcv_saddr.s6_addr32[0]) + ++ off); ++#else ++ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); ++#endif ++ break; ++ ++ case offsetof(struct __sk_buff, remote_port): ++ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); ++ ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, sk)); ++ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, ++ bpf_target_off(struct sock_common, ++ skc_dport, ++ 2, target_size)); ++#ifndef __BIG_ENDIAN_BITFIELD ++ *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); ++#endif + break; ++ ++ case offsetof(struct __sk_buff, local_port): ++ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); ++ ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, sk)); ++ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, ++ bpf_target_off(struct sock_common, ++ skc_num, 2, target_size)); ++ break; ++ ++ case offsetof(struct __sk_buff, tstamp): ++ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8); ++ ++ if (type == BPF_WRITE) ++ *insn++ = BPF_STX_MEM(BPF_DW, ++ si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, ++ tstamp, 8, ++ target_size)); ++ else ++ *insn++ = BPF_LDX_MEM(BPF_DW, ++ si->dst_reg, si->src_reg, ++ bpf_target_off(struct sk_buff, ++ tstamp, 8, ++ target_size)); ++ break; ++ ++ case offsetof(struct __sk_buff, gso_segs): ++ /* si->dst_reg = skb_shinfo(SKB); */ ++#ifdef NET_SKBUFF_DATA_USES_OFFSET ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), ++ BPF_REG_AX, si->src_reg, ++ offsetof(struct sk_buff, end)); ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, head)); ++ *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); ++#else ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, end)); + #endif ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs), ++ si->dst_reg, si->dst_reg, ++ bpf_target_off(struct skb_shared_info, ++ gso_segs, 2, ++ target_size)); ++ break; ++ case offsetof(struct __sk_buff, wire_len): ++ BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4); ++ ++ off = si->off; ++ off -= offsetof(struct __sk_buff, wire_len); ++ off += offsetof(struct sk_buff, cb); ++ off += offsetof(struct qdisc_skb_cb, pkt_len); ++ *target_size = 4; ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); ++ break; ++ ++ case offsetof(struct __sk_buff, sk): ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, sk)); ++ break; + } + + return insn - insn_buf; + } + +-static const struct bpf_verifier_ops sk_filter_ops = { +- .get_func_proto = sk_filter_func_proto, +- .is_valid_access = sk_filter_is_valid_access, +- .convert_ctx_access = bpf_net_convert_ctx_access, ++u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, u32 *target_size) ++{ ++ return 0; ++} ++ ++static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, u32 *target_size) ++{ ++ struct bpf_insn *insn = insn_buf; ++ ++ switch (si->off) { ++ case offsetof(struct __sk_buff, ifindex): ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), ++ si->dst_reg, si->src_reg, ++ offsetof(struct sk_buff, dev)); ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, ++ bpf_target_off(struct net_device, ifindex, 4, ++ target_size)); ++ break; ++ default: ++ return bpf_convert_ctx_access(type, si, insn_buf, prog, ++ target_size); ++ } ++ ++ return insn - insn_buf; ++} ++ ++static u32 xdp_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, u32 *target_size) ++{ ++ return 0; ++} ++ ++/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of ++ * context Structure, F is Field in context structure that contains a pointer ++ * to Nested Structure of type NS that has the field NF. ++ * ++ * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make ++ * sure that SIZE is not greater than actual size of S.F.NF. ++ * ++ * If offset OFF is provided, the load happens from that offset relative to ++ * offset of NF. ++ */ ++#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \ ++ do { \ ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \ ++ si->src_reg, offsetof(S, F)); \ ++ *insn++ = BPF_LDX_MEM( \ ++ SIZE, si->dst_reg, si->dst_reg, \ ++ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \ ++ target_size) \ ++ + OFF); \ ++ } while (0) ++ ++#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \ ++ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \ ++ BPF_FIELD_SIZEOF(NS, NF), 0) ++ ++/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to ++ * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation. ++ * ++ * In addition it uses Temporary Field TF (member of struct S) as the 3rd ++ * "register" since two registers available in convert_ctx_access are not ++ * enough: we can't override neither SRC, since it contains value to store, nor ++ * DST since it contains pointer to context that may be used by later ++ * instructions. But we need a temporary place to save pointer to nested ++ * structure whose field we want to store to. ++ */ ++#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \ ++ do { \ ++ int tmp_reg = BPF_REG_9; \ ++ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \ ++ --tmp_reg; \ ++ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \ ++ --tmp_reg; \ ++ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \ ++ offsetof(S, TF)); \ ++ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \ ++ si->dst_reg, offsetof(S, F)); \ ++ *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \ ++ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \ ++ target_size) \ ++ + OFF); \ ++ *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \ ++ offsetof(S, TF)); \ ++ } while (0) ++ ++#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \ ++ TF) \ ++ do { \ ++ if (type == BPF_WRITE) { \ ++ SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \ ++ OFF, TF); \ ++ } else { \ ++ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \ ++ S, NS, F, NF, SIZE, OFF); \ ++ } \ ++ } while (0) ++ ++#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \ ++ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \ ++ S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF) ++ ++static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, u32 *target_size) ++{ ++ return 0; ++} ++ ++static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, ++ u32 *target_size) ++{ ++ return 0; ++} ++ ++static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, u32 *target_size) ++{ ++ return 0; ++} ++ ++static u32 sk_msg_convert_ctx_access(enum bpf_access_type type, ++ const struct bpf_insn *si, ++ struct bpf_insn *insn_buf, ++ struct bpf_prog *prog, u32 *target_size) ++{ ++ return 0; ++} ++ ++const struct bpf_verifier_ops sk_filter_verifier_ops = { ++ .is_valid_access = sk_filter_is_valid_access, ++ .convert_ctx_access = bpf_convert_ctx_access, ++ .gen_ld_abs = bpf_gen_ld_abs, + }; + +-static const struct bpf_verifier_ops tc_cls_act_ops = { +- .get_func_proto = tc_cls_act_func_proto, +- .is_valid_access = tc_cls_act_is_valid_access, +- .convert_ctx_access = bpf_net_convert_ctx_access, ++const struct bpf_prog_ops sk_filter_prog_ops = { + }; + +-static struct bpf_prog_type_list sk_filter_type __read_mostly = { +- .ops = &sk_filter_ops, +- .type = BPF_PROG_TYPE_SOCKET_FILTER, ++const struct bpf_verifier_ops tc_cls_act_verifier_ops = { ++ .get_func_proto = tc_cls_act_func_proto, ++ .is_valid_access = tc_cls_act_is_valid_access, ++ .convert_ctx_access = tc_cls_act_convert_ctx_access, ++ .gen_prologue = tc_cls_act_prologue, ++ .gen_ld_abs = bpf_gen_ld_abs, + }; + +-static struct bpf_prog_type_list sched_cls_type __read_mostly = { +- .ops = &tc_cls_act_ops, +- .type = BPF_PROG_TYPE_SCHED_CLS, ++const struct bpf_prog_ops tc_cls_act_prog_ops = { + }; + +-static struct bpf_prog_type_list sched_act_type __read_mostly = { +- .ops = &tc_cls_act_ops, +- .type = BPF_PROG_TYPE_SCHED_ACT, ++const struct bpf_verifier_ops xdp_verifier_ops = { ++ .get_func_proto = xdp_func_proto, ++ .is_valid_access = xdp_is_valid_access, ++ .convert_ctx_access = xdp_convert_ctx_access, ++ .gen_prologue = bpf_noop_prologue, + }; + +-static int __init register_sk_filter_ops(void) +-{ +- bpf_register_prog_type(&sk_filter_type); +- bpf_register_prog_type(&sched_cls_type); +- bpf_register_prog_type(&sched_act_type); ++const struct bpf_verifier_ops lwt_in_verifier_ops = { ++ .get_func_proto = lwt_in_func_proto, ++ .is_valid_access = lwt_is_valid_access, ++ .convert_ctx_access = bpf_convert_ctx_access, ++}; + +- return 0; +-} +-late_initcall(register_sk_filter_ops); ++const struct bpf_prog_ops lwt_in_prog_ops = { ++}; ++ ++const struct bpf_verifier_ops lwt_out_verifier_ops = { ++ .get_func_proto = lwt_out_func_proto, ++ .is_valid_access = lwt_is_valid_access, ++ .convert_ctx_access = bpf_convert_ctx_access, ++}; ++ ++const struct bpf_prog_ops lwt_out_prog_ops = { ++}; ++ ++const struct bpf_verifier_ops lwt_xmit_verifier_ops = { ++ .get_func_proto = lwt_xmit_func_proto, ++ .is_valid_access = lwt_is_valid_access, ++ .convert_ctx_access = bpf_convert_ctx_access, ++ .gen_prologue = tc_cls_act_prologue, ++}; ++ ++const struct bpf_prog_ops lwt_xmit_prog_ops = { ++}; ++ ++const struct bpf_verifier_ops lwt_seg6local_verifier_ops = { ++ .get_func_proto = lwt_seg6local_func_proto, ++ .is_valid_access = lwt_is_valid_access, ++ .convert_ctx_access = bpf_convert_ctx_access, ++}; ++ ++const struct bpf_prog_ops lwt_seg6local_prog_ops = { ++}; ++ ++const struct bpf_verifier_ops cg_sock_verifier_ops = { ++ .get_func_proto = sock_filter_func_proto, ++ .is_valid_access = sock_filter_is_valid_access, ++ .convert_ctx_access = bpf_sock_convert_ctx_access, ++}; ++ ++const struct bpf_prog_ops cg_sock_prog_ops = { ++}; ++ ++const struct bpf_verifier_ops cg_sock_addr_verifier_ops = { ++ .get_func_proto = sock_addr_func_proto, ++ .is_valid_access = sock_addr_is_valid_access, ++ .convert_ctx_access = sock_addr_convert_ctx_access, ++}; + +-int __sk_detach_filter(struct sock *sk, bool locked) ++const struct bpf_prog_ops cg_sock_addr_prog_ops = { ++}; ++ ++const struct bpf_verifier_ops sock_ops_verifier_ops = { ++ .get_func_proto = sock_ops_func_proto, ++ .is_valid_access = sock_ops_is_valid_access, ++ .convert_ctx_access = sock_ops_convert_ctx_access, ++}; ++ ++const struct bpf_prog_ops sock_ops_prog_ops = { ++}; ++ ++const struct bpf_verifier_ops sk_skb_verifier_ops = { ++ .get_func_proto = sk_skb_func_proto, ++ .is_valid_access = sk_skb_is_valid_access, ++ .convert_ctx_access = sk_skb_convert_ctx_access, ++ .gen_prologue = sk_skb_prologue, ++}; ++ ++const struct bpf_prog_ops sk_skb_prog_ops = { ++}; ++ ++const struct bpf_verifier_ops sk_msg_verifier_ops = { ++ .get_func_proto = sk_msg_func_proto, ++ .is_valid_access = sk_msg_is_valid_access, ++ .convert_ctx_access = sk_msg_convert_ctx_access, ++ .gen_prologue = bpf_noop_prologue, ++}; ++ ++const struct bpf_prog_ops sk_msg_prog_ops = { ++}; ++ ++const struct bpf_verifier_ops flow_dissector_verifier_ops = { ++ .get_func_proto = flow_dissector_func_proto, ++ .is_valid_access = flow_dissector_is_valid_access, ++ .convert_ctx_access = flow_dissector_convert_ctx_access, ++}; ++ ++const struct bpf_prog_ops flow_dissector_prog_ops = { ++}; ++ ++int sk_detach_filter(struct sock *sk) + { + int ret = -ENOENT; + struct sk_filter *filter; +@@ -1928,7 +3664,8 @@ int __sk_detach_filter(struct sock *sk, + if (sock_flag(sk, SOCK_FILTER_LOCKED)) + return -EPERM; + +- filter = rcu_dereference_protected(sk->sk_filter, locked); ++ filter = rcu_dereference_protected(sk->sk_filter, ++ lockdep_sock_is_held(sk)); + if (filter) { + RCU_INIT_POINTER(sk->sk_filter, NULL); + sk_filter_uncharge(sk, filter); +@@ -1937,12 +3674,7 @@ int __sk_detach_filter(struct sock *sk, + + return ret; + } +-EXPORT_SYMBOL_GPL(__sk_detach_filter); +- +-int sk_detach_filter(struct sock *sk) +-{ +- return __sk_detach_filter(sk, sock_owned_by_user(sk)); +-} ++EXPORT_SYMBOL_GPL(sk_detach_filter); + + int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, + unsigned int len) +@@ -1953,7 +3685,7 @@ int sk_get_filter(struct sock *sk, struc + + lock_sock(sk); + filter = rcu_dereference_protected(sk->sk_filter, +- sock_owned_by_user(sk)); ++ lockdep_sock_is_held(sk)); + if (!filter) + goto out; + +@@ -1987,3 +3719,5 @@ out: + release_sock(sk); + return ret; + } ++ ++ +--- a/include/asm-generic/barrier.h ++++ b/include/asm-generic/barrier.h +@@ -119,5 +119,29 @@ do { \ + ___p1; \ + }) + ++/** ++ * smp_cond_load_relaxed() - (Spin) wait for cond with no ordering guarantees ++ * @ptr: pointer to the variable to wait on ++ * @cond: boolean expression to wait for ++ * ++ * Equivalent to using READ_ONCE() on the condition variable. ++ * ++ * Due to C lacking lambda expressions we load the value of *ptr into a ++ * pre-named variable @VAL to be used in @cond. ++ */ ++#ifndef smp_cond_load_relaxed ++#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ ++ typeof(ptr) __PTR = (ptr); \ ++ typeof(*ptr) VAL; \ ++ for (;;) { \ ++ VAL = READ_ONCE(*__PTR); \ ++ if (cond_expr) \ ++ break; \ ++ cpu_relax(); \ ++ } \ ++ VAL; \ ++}) ++#endif ++ + #endif /* !__ASSEMBLY__ */ + #endif /* __ASM_GENERIC_BARRIER_H */ +--- a/arch/arm/include/asm/barrier.h ++++ b/arch/arm/include/asm/barrier.h +@@ -94,4 +94,6 @@ do { \ + #define smp_mb__after_atomic() smp_mb() + + #endif /* !__ASSEMBLY__ */ ++ ++#include + #endif /* __ASM_BARRIER_H */ +--- a/include/linux/list_nulls.h ++++ b/include/linux/list_nulls.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + #ifndef _LINUX_LIST_NULLS_H + #define _LINUX_LIST_NULLS_H + +@@ -29,6 +30,11 @@ struct hlist_nulls_node { + ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) + + #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) ++ ++#define hlist_nulls_entry_safe(ptr, type, member) \ ++ ({ typeof(ptr) ____ptr = (ptr); \ ++ !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \ ++ }) + /** + * ptr_is_a_nulls - Test if a ptr is a nulls + * @ptr: ptr to be tested +@@ -57,7 +63,7 @@ static inline int hlist_nulls_unhashed(c + + static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) + { +- return is_a_nulls(h->first); ++ return is_a_nulls(READ_ONCE(h->first)); + } + + static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, +@@ -66,10 +72,10 @@ static inline void hlist_nulls_add_head( + struct hlist_nulls_node *first = h->first; + + n->next = first; +- n->pprev = &h->first; ++ WRITE_ONCE(n->pprev, &h->first); + h->first = n; + if (!is_a_nulls(first)) +- first->pprev = &n->next; ++ WRITE_ONCE(first->pprev, &n->next); + } + + static inline void __hlist_nulls_del(struct hlist_nulls_node *n) +@@ -79,13 +85,13 @@ static inline void __hlist_nulls_del(str + + WRITE_ONCE(*pprev, next); + if (!is_a_nulls(next)) +- next->pprev = pprev; ++ WRITE_ONCE(next->pprev, pprev); + } + + static inline void hlist_nulls_del(struct hlist_nulls_node *n) + { + __hlist_nulls_del(n); +- n->pprev = LIST_POISON2; ++ WRITE_ONCE(n->pprev, LIST_POISON2); + } + + /** +--- a/include/linux/rculist_nulls.h ++++ b/include/linux/rculist_nulls.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + #ifndef _LINUX_RCULIST_NULLS_H + #define _LINUX_RCULIST_NULLS_H + +@@ -33,7 +34,7 @@ static inline void hlist_nulls_del_init_ + { + if (!hlist_nulls_unhashed(n)) { + __hlist_nulls_del(n); +- n->pprev = NULL; ++ WRITE_ONCE(n->pprev, NULL); + } + } + +@@ -65,7 +66,7 @@ static inline void hlist_nulls_del_init_ + static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) + { + __hlist_nulls_del(n); +- n->pprev = LIST_POISON2; ++ WRITE_ONCE(n->pprev, LIST_POISON2); + } + + /** +@@ -93,11 +94,49 @@ static inline void hlist_nulls_add_head_ + struct hlist_nulls_node *first = h->first; + + n->next = first; +- n->pprev = &h->first; ++ WRITE_ONCE(n->pprev, &h->first); + rcu_assign_pointer(hlist_nulls_first_rcu(h), n); + if (!is_a_nulls(first)) +- first->pprev = &n->next; ++ WRITE_ONCE(first->pprev, &n->next); + } ++ ++/** ++ * hlist_nulls_add_tail_rcu ++ * @n: the element to add to the hash list. ++ * @h: the list to add to. ++ * ++ * Description: ++ * Adds the specified element to the specified hlist_nulls, ++ * while permitting racing traversals. ++ * ++ * The caller must take whatever precautions are necessary ++ * (such as holding appropriate locks) to avoid racing ++ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() ++ * or hlist_nulls_del_rcu(), running on this same list. ++ * However, it is perfectly legal to run concurrently with ++ * the _rcu list-traversal primitives, such as ++ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency ++ * problems on Alpha CPUs. Regardless of the type of CPU, the ++ * list-traversal primitive must be guarded by rcu_read_lock(). ++ */ ++static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, ++ struct hlist_nulls_head *h) ++{ ++ struct hlist_nulls_node *i, *last = NULL; ++ ++ /* Note: write side code, so rcu accessors are not needed. */ ++ for (i = h->first; !is_a_nulls(i); i = i->next) ++ last = i; ++ ++ if (last) { ++ n->next = last->next; ++ n->pprev = &last->next; ++ rcu_assign_pointer(hlist_next_rcu(last), n); ++ } else { ++ hlist_nulls_add_head_rcu(n, h); ++ } ++} ++ + /** + * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. +@@ -107,7 +146,7 @@ static inline void hlist_nulls_add_head_ + * + * The barrier() is needed to make sure compiler doesn't cache first element [1], + * as this loop can be restarted [2] +- * [1] Documentation/atomic_ops.txt around line 114 ++ * [1] Documentation/core-api/atomic_ops.rst around line 114 + * [2] Documentation/RCU/rculist_nulls.txt around line 146 + */ + #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ +@@ -117,5 +156,19 @@ static inline void hlist_nulls_add_head_ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) + ++/** ++ * hlist_nulls_for_each_entry_safe - ++ * iterate over list of given type safe against removal of list entry ++ * @tpos: the type * to use as a loop cursor. ++ * @pos: the &struct hlist_nulls_node to use as a loop cursor. ++ * @head: the head for your list. ++ * @member: the name of the hlist_nulls_node within the struct. ++ */ ++#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \ ++ for (({barrier();}), \ ++ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ ++ (!is_a_nulls(pos)) && \ ++ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \ ++ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });) + #endif + #endif +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -627,8 +627,9 @@ static int tun_attach(struct tun_struct + + /* Re-attach the filter to persist device */ + if (!skip_filter && (tun->filter_attached == true)) { +- err = __sk_attach_filter(&tun->fprog, tfile->socket.sk, +- lockdep_rtnl_is_held()); ++ lock_sock(tfile->socket.sk); ++ err = sk_attach_filter(&tun->fprog, tfile->socket.sk); ++ release_sock(tfile->socket.sk); + if (!err) + goto out; + } +@@ -1835,7 +1836,9 @@ static void tun_detach_filter(struct tun + + for (i = 0; i < n; i++) { + tfile = rtnl_dereference(tun->tfiles[i]); +- __sk_detach_filter(tfile->socket.sk, lockdep_rtnl_is_held()); ++ lock_sock(tfile->socket.sk); ++ sk_detach_filter(tfile->socket.sk); ++ release_sock(tfile->socket.sk); + } + + tun->filter_attached = false; +@@ -1848,8 +1851,9 @@ static int tun_attach_filter(struct tun_ + + for (i = 0; i < tun->numqueues; i++) { + tfile = rtnl_dereference(tun->tfiles[i]); +- ret = __sk_attach_filter(&tun->fprog, tfile->socket.sk, +- lockdep_rtnl_is_held()); ++ lock_sock(tfile->socket.sk); ++ ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); ++ release_sock(tfile->socket.sk); + if (ret) { + tun_detach_filter(tun, i); + return ret; +--- a/include/linux/list.h ++++ b/include/linux/list.h +@@ -1,3 +1,4 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ + #ifndef _LINUX_LIST_H + #define _LINUX_LIST_H + +@@ -24,31 +25,46 @@ + + static inline void INIT_LIST_HEAD(struct list_head *list) + { +- list->next = list; ++ WRITE_ONCE(list->next, list); + list->prev = list; + } + ++#ifdef CONFIG_DEBUG_LIST ++extern bool __list_add_valid(struct list_head *new, ++ struct list_head *prev, ++ struct list_head *next); ++extern bool __list_del_entry_valid(struct list_head *entry); ++#else ++static inline bool __list_add_valid(struct list_head *new, ++ struct list_head *prev, ++ struct list_head *next) ++{ ++ return true; ++} ++static inline bool __list_del_entry_valid(struct list_head *entry) ++{ ++ return true; ++} ++#endif ++ + /* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +-#ifndef CONFIG_DEBUG_LIST + static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) + { ++ if (!__list_add_valid(new, prev, next)) ++ return; ++ + next->prev = new; + new->next = next; + new->prev = prev; +- prev->next = new; ++ WRITE_ONCE(prev->next, new); + } +-#else +-extern void __list_add(struct list_head *new, +- struct list_head *prev, +- struct list_head *next); +-#endif + + /** + * list_add - add a new entry +@@ -90,28 +106,40 @@ static inline void __list_del(struct lis + WRITE_ONCE(prev->next, next); + } + ++/* ++ * Delete a list entry and clear the 'prev' pointer. ++ * ++ * This is a special-purpose list clearing method used in the networking code ++ * for lists allocated as per-cpu, where we don't want to incur the extra ++ * WRITE_ONCE() overhead of a regular list_del_init(). The code that uses this ++ * needs to check the node 'prev' pointer instead of calling list_empty(). ++ */ ++static inline void __list_del_clearprev(struct list_head *entry) ++{ ++ __list_del(entry->prev, entry->next); ++ entry->prev = NULL; ++} ++ + /** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty() on entry does not return true after this, the entry is + * in an undefined state. + */ +-#ifndef CONFIG_DEBUG_LIST + static inline void __list_del_entry(struct list_head *entry) + { ++ if (!__list_del_entry_valid(entry)) ++ return; ++ + __list_del(entry->prev, entry->next); + } + + static inline void list_del(struct list_head *entry) + { +- __list_del(entry->prev, entry->next); ++ __list_del_entry(entry); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; + } +-#else +-extern void __list_del_entry(struct list_head *entry); +-extern void list_del(struct list_head *entry); +-#endif + + /** + * list_replace - replace old entry by new one +@@ -137,6 +165,23 @@ static inline void list_replace_init(str + } + + /** ++ * list_swap - replace entry1 with entry2 and re-add entry1 at entry2's position ++ * @entry1: the location to place entry2 ++ * @entry2: the location to place entry1 ++ */ ++static inline void list_swap(struct list_head *entry1, ++ struct list_head *entry2) ++{ ++ struct list_head *pos = entry2->prev; ++ ++ list_del(entry2); ++ list_replace(entry1, entry2); ++ if (pos == entry1) ++ pos = entry2; ++ list_add(entry1, pos); ++} ++ ++/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +@@ -170,6 +215,40 @@ static inline void list_move_tail(struct + } + + /** ++ * list_bulk_move_tail - move a subsection of a list to its tail ++ * @head: the head that will follow our entry ++ * @first: first entry to move ++ * @last: last entry to move, can be the same as first ++ * ++ * Move all entries between @first and including @last before @head. ++ * All three entries must belong to the same linked list. ++ */ ++static inline void list_bulk_move_tail(struct list_head *head, ++ struct list_head *first, ++ struct list_head *last) ++{ ++ first->prev->next = last->next; ++ last->next->prev = first->prev; ++ ++ head->prev->next = first; ++ first->prev = head->prev; ++ ++ last->next = head; ++ head->prev = last; ++} ++ ++/** ++ * list_is_first -- tests whether @list is the first entry in list @head ++ * @list: the entry to test ++ * @head: the head of the list ++ */ ++static inline int list_is_first(const struct list_head *list, ++ const struct list_head *head) ++{ ++ return list->prev == head; ++} ++ ++/** + * list_is_last - tests whether @list is the last entry in list @head + * @list: the entry to test + * @head: the head of the list +@@ -186,7 +265,7 @@ static inline int list_is_last(const str + */ + static inline int list_empty(const struct list_head *head) + { +- return head->next == head; ++ return READ_ONCE(head->next) == head; + } + + /** +@@ -223,6 +302,24 @@ static inline void list_rotate_left(stru + } + + /** ++ * list_rotate_to_front() - Rotate list to specific item. ++ * @list: The desired new front of the list. ++ * @head: The head of the list. ++ * ++ * Rotates list so that @list becomes the new front of the list. ++ */ ++static inline void list_rotate_to_front(struct list_head *list, ++ struct list_head *head) ++{ ++ /* ++ * Deletes the list head from the list denoted by @head and ++ * places it as the tail of @list, this effectively rotates the ++ * list so that @list is at the front. ++ */ ++ list_move_tail(head, list); ++} ++ ++/** + * list_is_singular - tests whether a list has just one entry. + * @head: the list to test. + */ +@@ -271,6 +368,36 @@ static inline void list_cut_position(str + __list_cut_position(list, head, entry); + } + ++/** ++ * list_cut_before - cut a list into two, before given entry ++ * @list: a new list to add all removed entries ++ * @head: a list with entries ++ * @entry: an entry within head, could be the head itself ++ * ++ * This helper moves the initial part of @head, up to but ++ * excluding @entry, from @head to @list. You should pass ++ * in @entry an element you know is on @head. @list should ++ * be an empty list or a list you do not care about losing ++ * its data. ++ * If @entry == @head, all entries on @head are moved to ++ * @list. ++ */ ++static inline void list_cut_before(struct list_head *list, ++ struct list_head *head, ++ struct list_head *entry) ++{ ++ if (head->next == entry) { ++ INIT_LIST_HEAD(list); ++ return; ++ } ++ list->next = head->next; ++ list->next->prev = list; ++ list->prev = entry->prev; ++ list->prev->next = list; ++ head->next = entry; ++ entry->prev = head; ++} ++ + static inline void __list_splice(const struct list_head *list, + struct list_head *prev, + struct list_head *next) +@@ -381,8 +508,11 @@ static inline void list_splice_tail_init + * + * Note that if the list is empty, it returns NULL. + */ +-#define list_first_entry_or_null(ptr, type, member) \ +- (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL) ++#define list_first_entry_or_null(ptr, type, member) ({ \ ++ struct list_head *head__ = (ptr); \ ++ struct list_head *pos__ = READ_ONCE(head__->next); \ ++ pos__ != head__ ? list_entry(pos__, type, member) : NULL; \ ++}) + + /** + * list_next_entry - get the next element in list +@@ -511,6 +641,19 @@ static inline void list_splice_tail_init + pos = list_next_entry(pos, member)) + + /** ++ * list_for_each_entry_from_reverse - iterate backwards over list of given type ++ * from the current point ++ * @pos: the type * to use as a loop cursor. ++ * @head: the head for your list. ++ * @member: the name of the list_head within the struct. ++ * ++ * Iterate backwards over list of given type, continuing from current position. ++ */ ++#define list_for_each_entry_from_reverse(pos, head, member) \ ++ for (; &pos->member != (head); \ ++ pos = list_prev_entry(pos, member)) ++ ++/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage +@@ -608,7 +751,7 @@ static inline int hlist_unhashed(const s + + static inline int hlist_empty(const struct hlist_head *h) + { +- return !h->first; ++ return !READ_ONCE(h->first); + } + + static inline void __hlist_del(struct hlist_node *n) +@@ -642,7 +785,7 @@ static inline void hlist_add_head(struct + n->next = first; + if (first) + first->pprev = &n->next; +- h->first = n; ++ WRITE_ONCE(h->first, n); + n->pprev = &h->first; + } + +@@ -653,7 +796,7 @@ static inline void hlist_add_before(stru + n->pprev = next->pprev; + n->next = next; + next->pprev = &n->next; +- *(n->pprev) = n; ++ WRITE_ONCE(*(n->pprev), n); + } + + static inline void hlist_add_behind(struct hlist_node *n, +@@ -679,6 +822,16 @@ static inline bool hlist_fake(struct hli + } + + /* ++ * Check whether the node is the only node of the head without ++ * accessing head: ++ */ ++static inline bool ++hlist_is_singular_node(struct hlist_node *n, struct hlist_head *h) ++{ ++ return !n->next && n->pprev == &h->first; ++} ++ ++/* + * Move a list from one list head to another. Fixup the pprev + * reference of the first entry if it exists. + */ +--- /dev/null ++++ b/include/linux/ptr_ring.h +@@ -0,0 +1,673 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * Definitions for the 'struct ptr_ring' datastructure. ++ * ++ * Author: ++ * Michael S. Tsirkin ++ * ++ * Copyright (C) 2016 Red Hat, Inc. ++ * ++ * This is a limited-size FIFO maintaining pointers in FIFO order, with ++ * one CPU producing entries and another consuming entries from a FIFO. ++ * ++ * This implementation tries to minimize cache-contention when there is a ++ * single producer and a single consumer CPU. ++ */ ++ ++#ifndef _LINUX_PTR_RING_H ++#define _LINUX_PTR_RING_H 1 ++ ++#ifdef __KERNEL__ ++#include ++#include ++#include ++#include ++#include ++#include ++#endif ++ ++struct ptr_ring { ++ int producer ____cacheline_aligned_in_smp; ++ spinlock_t producer_lock; ++ int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */ ++ int consumer_tail; /* next entry to invalidate */ ++ spinlock_t consumer_lock; ++ /* Shared consumer/producer data */ ++ /* Read-only by both the producer and the consumer */ ++ int size ____cacheline_aligned_in_smp; /* max entries in queue */ ++ int batch; /* number of entries to consume in a batch */ ++ void **queue; ++}; ++ ++/* Note: callers invoking this in a loop must use a compiler barrier, ++ * for example cpu_relax(). ++ * ++ * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock: ++ * see e.g. ptr_ring_full. ++ */ ++static inline bool __ptr_ring_full(struct ptr_ring *r) ++{ ++ return r->queue[r->producer]; ++} ++ ++static inline bool ptr_ring_full(struct ptr_ring *r) ++{ ++ bool ret; ++ ++ spin_lock(&r->producer_lock); ++ ret = __ptr_ring_full(r); ++ spin_unlock(&r->producer_lock); ++ ++ return ret; ++} ++ ++static inline bool ptr_ring_full_irq(struct ptr_ring *r) ++{ ++ bool ret; ++ ++ spin_lock_irq(&r->producer_lock); ++ ret = __ptr_ring_full(r); ++ spin_unlock_irq(&r->producer_lock); ++ ++ return ret; ++} ++ ++static inline bool ptr_ring_full_any(struct ptr_ring *r) ++{ ++ unsigned long flags; ++ bool ret; ++ ++ spin_lock_irqsave(&r->producer_lock, flags); ++ ret = __ptr_ring_full(r); ++ spin_unlock_irqrestore(&r->producer_lock, flags); ++ ++ return ret; ++} ++ ++static inline bool ptr_ring_full_bh(struct ptr_ring *r) ++{ ++ bool ret; ++ ++ spin_lock_bh(&r->producer_lock); ++ ret = __ptr_ring_full(r); ++ spin_unlock_bh(&r->producer_lock); ++ ++ return ret; ++} ++ ++/* Note: callers invoking this in a loop must use a compiler barrier, ++ * for example cpu_relax(). Callers must hold producer_lock. ++ * Callers are responsible for making sure pointer that is being queued ++ * points to a valid data. ++ */ ++static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) ++{ ++ if (unlikely(!r->size) || r->queue[r->producer]) ++ return -ENOSPC; ++ ++ /* Make sure the pointer we are storing points to a valid data. */ ++ /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ ++ smp_wmb(); ++ ++ WRITE_ONCE(r->queue[r->producer++], ptr); ++ if (unlikely(r->producer >= r->size)) ++ r->producer = 0; ++ return 0; ++} ++ ++/* ++ * Note: resize (below) nests producer lock within consumer lock, so if you ++ * consume in interrupt or BH context, you must disable interrupts/BH when ++ * calling this. ++ */ ++static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr) ++{ ++ int ret; ++ ++ spin_lock(&r->producer_lock); ++ ret = __ptr_ring_produce(r, ptr); ++ spin_unlock(&r->producer_lock); ++ ++ return ret; ++} ++ ++static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr) ++{ ++ int ret; ++ ++ spin_lock_irq(&r->producer_lock); ++ ret = __ptr_ring_produce(r, ptr); ++ spin_unlock_irq(&r->producer_lock); ++ ++ return ret; ++} ++ ++static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr) ++{ ++ unsigned long flags; ++ int ret; ++ ++ spin_lock_irqsave(&r->producer_lock, flags); ++ ret = __ptr_ring_produce(r, ptr); ++ spin_unlock_irqrestore(&r->producer_lock, flags); ++ ++ return ret; ++} ++ ++static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) ++{ ++ int ret; ++ ++ spin_lock_bh(&r->producer_lock); ++ ret = __ptr_ring_produce(r, ptr); ++ spin_unlock_bh(&r->producer_lock); ++ ++ return ret; ++} ++ ++static inline void *__ptr_ring_peek(struct ptr_ring *r) ++{ ++ if (likely(r->size)) ++ return READ_ONCE(r->queue[r->consumer_head]); ++ return NULL; ++} ++ ++/* ++ * Test ring empty status without taking any locks. ++ * ++ * NB: This is only safe to call if ring is never resized. ++ * ++ * However, if some other CPU consumes ring entries at the same time, the value ++ * returned is not guaranteed to be correct. ++ * ++ * In this case - to avoid incorrectly detecting the ring ++ * as empty - the CPU consuming the ring entries is responsible ++ * for either consuming all ring entries until the ring is empty, ++ * or synchronizing with some other CPU and causing it to ++ * re-test __ptr_ring_empty and/or consume the ring enteries ++ * after the synchronization point. ++ * ++ * Note: callers invoking this in a loop must use a compiler barrier, ++ * for example cpu_relax(). ++ */ ++static inline bool __ptr_ring_empty(struct ptr_ring *r) ++{ ++ if (likely(r->size)) ++ return !r->queue[READ_ONCE(r->consumer_head)]; ++ return true; ++} ++ ++static inline bool ptr_ring_empty(struct ptr_ring *r) ++{ ++ bool ret; ++ ++ spin_lock(&r->consumer_lock); ++ ret = __ptr_ring_empty(r); ++ spin_unlock(&r->consumer_lock); ++ ++ return ret; ++} ++ ++static inline bool ptr_ring_empty_irq(struct ptr_ring *r) ++{ ++ bool ret; ++ ++ spin_lock_irq(&r->consumer_lock); ++ ret = __ptr_ring_empty(r); ++ spin_unlock_irq(&r->consumer_lock); ++ ++ return ret; ++} ++ ++static inline bool ptr_ring_empty_any(struct ptr_ring *r) ++{ ++ unsigned long flags; ++ bool ret; ++ ++ spin_lock_irqsave(&r->consumer_lock, flags); ++ ret = __ptr_ring_empty(r); ++ spin_unlock_irqrestore(&r->consumer_lock, flags); ++ ++ return ret; ++} ++ ++static inline bool ptr_ring_empty_bh(struct ptr_ring *r) ++{ ++ bool ret; ++ ++ spin_lock_bh(&r->consumer_lock); ++ ret = __ptr_ring_empty(r); ++ spin_unlock_bh(&r->consumer_lock); ++ ++ return ret; ++} ++ ++/* Must only be called after __ptr_ring_peek returned !NULL */ ++static inline void __ptr_ring_discard_one(struct ptr_ring *r) ++{ ++ /* Fundamentally, what we want to do is update consumer ++ * index and zero out the entry so producer can reuse it. ++ * Doing it naively at each consume would be as simple as: ++ * consumer = r->consumer; ++ * r->queue[consumer++] = NULL; ++ * if (unlikely(consumer >= r->size)) ++ * consumer = 0; ++ * r->consumer = consumer; ++ * but that is suboptimal when the ring is full as producer is writing ++ * out new entries in the same cache line. Defer these updates until a ++ * batch of entries has been consumed. ++ */ ++ /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty ++ * to work correctly. ++ */ ++ int consumer_head = r->consumer_head; ++ int head = consumer_head++; ++ ++ /* Once we have processed enough entries invalidate them in ++ * the ring all at once so producer can reuse their space in the ring. ++ * We also do this when we reach end of the ring - not mandatory ++ * but helps keep the implementation simple. ++ */ ++ if (unlikely(consumer_head - r->consumer_tail >= r->batch || ++ consumer_head >= r->size)) { ++ /* Zero out entries in the reverse order: this way we touch the ++ * cache line that producer might currently be reading the last; ++ * producer won't make progress and touch other cache lines ++ * besides the first one until we write out all entries. ++ */ ++ while (likely(head >= r->consumer_tail)) ++ r->queue[head--] = NULL; ++ r->consumer_tail = consumer_head; ++ } ++ if (unlikely(consumer_head >= r->size)) { ++ consumer_head = 0; ++ r->consumer_tail = 0; ++ } ++ /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ ++ WRITE_ONCE(r->consumer_head, consumer_head); ++} ++ ++static inline void *__ptr_ring_consume(struct ptr_ring *r) ++{ ++ void *ptr; ++ ++ /* The READ_ONCE in __ptr_ring_peek guarantees that anyone ++ * accessing data through the pointer is up to date. Pairs ++ * with smp_wmb in __ptr_ring_produce. ++ */ ++ ptr = __ptr_ring_peek(r); ++ if (ptr) ++ __ptr_ring_discard_one(r); ++ ++ return ptr; ++} ++ ++static inline int __ptr_ring_consume_batched(struct ptr_ring *r, ++ void **array, int n) ++{ ++ void *ptr; ++ int i; ++ ++ for (i = 0; i < n; i++) { ++ ptr = __ptr_ring_consume(r); ++ if (!ptr) ++ break; ++ array[i] = ptr; ++ } ++ ++ return i; ++} ++ ++/* ++ * Note: resize (below) nests producer lock within consumer lock, so if you ++ * call this in interrupt or BH context, you must disable interrupts/BH when ++ * producing. ++ */ ++static inline void *ptr_ring_consume(struct ptr_ring *r) ++{ ++ void *ptr; ++ ++ spin_lock(&r->consumer_lock); ++ ptr = __ptr_ring_consume(r); ++ spin_unlock(&r->consumer_lock); ++ ++ return ptr; ++} ++ ++static inline void *ptr_ring_consume_irq(struct ptr_ring *r) ++{ ++ void *ptr; ++ ++ spin_lock_irq(&r->consumer_lock); ++ ptr = __ptr_ring_consume(r); ++ spin_unlock_irq(&r->consumer_lock); ++ ++ return ptr; ++} ++ ++static inline void *ptr_ring_consume_any(struct ptr_ring *r) ++{ ++ unsigned long flags; ++ void *ptr; ++ ++ spin_lock_irqsave(&r->consumer_lock, flags); ++ ptr = __ptr_ring_consume(r); ++ spin_unlock_irqrestore(&r->consumer_lock, flags); ++ ++ return ptr; ++} ++ ++static inline void *ptr_ring_consume_bh(struct ptr_ring *r) ++{ ++ void *ptr; ++ ++ spin_lock_bh(&r->consumer_lock); ++ ptr = __ptr_ring_consume(r); ++ spin_unlock_bh(&r->consumer_lock); ++ ++ return ptr; ++} ++ ++static inline int ptr_ring_consume_batched(struct ptr_ring *r, ++ void **array, int n) ++{ ++ int ret; ++ ++ spin_lock(&r->consumer_lock); ++ ret = __ptr_ring_consume_batched(r, array, n); ++ spin_unlock(&r->consumer_lock); ++ ++ return ret; ++} ++ ++static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r, ++ void **array, int n) ++{ ++ int ret; ++ ++ spin_lock_irq(&r->consumer_lock); ++ ret = __ptr_ring_consume_batched(r, array, n); ++ spin_unlock_irq(&r->consumer_lock); ++ ++ return ret; ++} ++ ++static inline int ptr_ring_consume_batched_any(struct ptr_ring *r, ++ void **array, int n) ++{ ++ unsigned long flags; ++ int ret; ++ ++ spin_lock_irqsave(&r->consumer_lock, flags); ++ ret = __ptr_ring_consume_batched(r, array, n); ++ spin_unlock_irqrestore(&r->consumer_lock, flags); ++ ++ return ret; ++} ++ ++static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, ++ void **array, int n) ++{ ++ int ret; ++ ++ spin_lock_bh(&r->consumer_lock); ++ ret = __ptr_ring_consume_batched(r, array, n); ++ spin_unlock_bh(&r->consumer_lock); ++ ++ return ret; ++} ++ ++/* Cast to structure type and call a function without discarding from FIFO. ++ * Function must return a value. ++ * Callers must take consumer_lock. ++ */ ++#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r))) ++ ++#define PTR_RING_PEEK_CALL(r, f) ({ \ ++ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ ++ \ ++ spin_lock(&(r)->consumer_lock); \ ++ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ ++ spin_unlock(&(r)->consumer_lock); \ ++ __PTR_RING_PEEK_CALL_v; \ ++}) ++ ++#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \ ++ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ ++ \ ++ spin_lock_irq(&(r)->consumer_lock); \ ++ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ ++ spin_unlock_irq(&(r)->consumer_lock); \ ++ __PTR_RING_PEEK_CALL_v; \ ++}) ++ ++#define PTR_RING_PEEK_CALL_BH(r, f) ({ \ ++ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ ++ \ ++ spin_lock_bh(&(r)->consumer_lock); \ ++ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ ++ spin_unlock_bh(&(r)->consumer_lock); \ ++ __PTR_RING_PEEK_CALL_v; \ ++}) ++ ++#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \ ++ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ ++ unsigned long __PTR_RING_PEEK_CALL_f;\ ++ \ ++ spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ ++ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ ++ spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ ++ __PTR_RING_PEEK_CALL_v; \ ++}) ++ ++/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See ++ * documentation for vmalloc for which of them are legal. ++ */ ++static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) ++{ ++ if (size > KMALLOC_MAX_SIZE / sizeof(void *)) ++ return NULL; ++ return kmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO); ++} ++ ++static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) ++{ ++ r->size = size; ++ r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue)); ++ /* We need to set batch at least to 1 to make logic ++ * in __ptr_ring_discard_one work correctly. ++ * Batching too much (because ring is small) would cause a lot of ++ * burstiness. Needs tuning, for now disable batching. ++ */ ++ if (r->batch > r->size / 2 || !r->batch) ++ r->batch = 1; ++} ++ ++static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) ++{ ++ r->queue = __ptr_ring_init_queue_alloc(size, gfp); ++ if (!r->queue) ++ return -ENOMEM; ++ ++ __ptr_ring_set_size(r, size); ++ r->producer = r->consumer_head = r->consumer_tail = 0; ++ spin_lock_init(&r->producer_lock); ++ spin_lock_init(&r->consumer_lock); ++ ++ return 0; ++} ++ ++/* ++ * Return entries into ring. Destroy entries that don't fit. ++ * ++ * Note: this is expected to be a rare slow path operation. ++ * ++ * Note: producer lock is nested within consumer lock, so if you ++ * resize you must make sure all uses nest correctly. ++ * In particular if you consume ring in interrupt or BH context, you must ++ * disable interrupts/BH when doing so. ++ */ ++static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, ++ void (*destroy)(void *)) ++{ ++ unsigned long flags; ++ int head; ++ ++ spin_lock_irqsave(&r->consumer_lock, flags); ++ spin_lock(&r->producer_lock); ++ ++ if (!r->size) ++ goto done; ++ ++ /* ++ * Clean out buffered entries (for simplicity). This way following code ++ * can test entries for NULL and if not assume they are valid. ++ */ ++ head = r->consumer_head - 1; ++ while (likely(head >= r->consumer_tail)) ++ r->queue[head--] = NULL; ++ r->consumer_tail = r->consumer_head; ++ ++ /* ++ * Go over entries in batch, start moving head back and copy entries. ++ * Stop when we run into previously unconsumed entries. ++ */ ++ while (n) { ++ head = r->consumer_head - 1; ++ if (head < 0) ++ head = r->size - 1; ++ if (r->queue[head]) { ++ /* This batch entry will have to be destroyed. */ ++ goto done; ++ } ++ r->queue[head] = batch[--n]; ++ r->consumer_tail = head; ++ /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ ++ WRITE_ONCE(r->consumer_head, head); ++ } ++ ++done: ++ /* Destroy all entries left in the batch. */ ++ while (n) ++ destroy(batch[--n]); ++ spin_unlock(&r->producer_lock); ++ spin_unlock_irqrestore(&r->consumer_lock, flags); ++} ++ ++static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, ++ int size, gfp_t gfp, ++ void (*destroy)(void *)) ++{ ++ int producer = 0; ++ void **old; ++ void *ptr; ++ ++ while ((ptr = __ptr_ring_consume(r))) ++ if (producer < size) ++ queue[producer++] = ptr; ++ else if (destroy) ++ destroy(ptr); ++ ++ if (producer >= size) ++ producer = 0; ++ __ptr_ring_set_size(r, size); ++ r->producer = producer; ++ r->consumer_head = 0; ++ r->consumer_tail = 0; ++ old = r->queue; ++ r->queue = queue; ++ ++ return old; ++} ++ ++/* ++ * Note: producer lock is nested within consumer lock, so if you ++ * resize you must make sure all uses nest correctly. ++ * In particular if you consume ring in interrupt or BH context, you must ++ * disable interrupts/BH when doing so. ++ */ ++static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, ++ void (*destroy)(void *)) ++{ ++ unsigned long flags; ++ void **queue = __ptr_ring_init_queue_alloc(size, gfp); ++ void **old; ++ ++ if (!queue) ++ return -ENOMEM; ++ ++ spin_lock_irqsave(&(r)->consumer_lock, flags); ++ spin_lock(&(r)->producer_lock); ++ ++ old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy); ++ ++ spin_unlock(&(r)->producer_lock); ++ spin_unlock_irqrestore(&(r)->consumer_lock, flags); ++ ++ kvfree(old); ++ ++ return 0; ++} ++ ++/* ++ * Note: producer lock is nested within consumer lock, so if you ++ * resize you must make sure all uses nest correctly. ++ * In particular if you consume ring in interrupt or BH context, you must ++ * disable interrupts/BH when doing so. ++ */ ++static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, ++ unsigned int nrings, ++ int size, ++ gfp_t gfp, void (*destroy)(void *)) ++{ ++ unsigned long flags; ++ void ***queues; ++ int i; ++ ++ queues = kmalloc_array(nrings, sizeof(*queues), gfp); ++ if (!queues) ++ goto noqueues; ++ ++ for (i = 0; i < nrings; ++i) { ++ queues[i] = __ptr_ring_init_queue_alloc(size, gfp); ++ if (!queues[i]) ++ goto nomem; ++ } ++ ++ for (i = 0; i < nrings; ++i) { ++ spin_lock_irqsave(&(rings[i])->consumer_lock, flags); ++ spin_lock(&(rings[i])->producer_lock); ++ queues[i] = __ptr_ring_swap_queue(rings[i], queues[i], ++ size, gfp, destroy); ++ spin_unlock(&(rings[i])->producer_lock); ++ spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags); ++ } ++ ++ for (i = 0; i < nrings; ++i) ++ kvfree(queues[i]); ++ ++ kfree(queues); ++ ++ return 0; ++ ++nomem: ++ while (--i >= 0) ++ kvfree(queues[i]); ++ ++ kfree(queues); ++ ++noqueues: ++ return -ENOMEM; ++} ++ ++static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) ++{ ++ void *ptr; ++ ++ if (destroy) ++ while ((ptr = ptr_ring_consume(r))) ++ destroy(ptr); ++ kvfree(r->queue); ++} ++ ++#endif /* _LINUX_PTR_RING_H */ +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + + /* A. Checksumming of received packets by device. +@@ -592,13 +593,23 @@ struct sk_buff { + */ + kmemcheck_bitfield_begin(flags1); + __u16 queue_mapping; ++ ++/* if you move cloned around you also must adapt those constants */ ++#ifdef __BIG_ENDIAN_BITFIELD ++#define CLONED_MASK (1 << 7) ++#else ++#define CLONED_MASK 1 ++#endif ++#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) ++ ++ __u8 __cloned_offset[0]; + __u8 cloned:1, + nohdr:1, + fclone:2, + peeked:1, + head_frag:1, +- xmit_more:1; +- /* one bit hole */ ++ xmit_more:1, ++ __unused:1; /* one bit hole */ + kmemcheck_bitfield_end(flags1); + + /* fields enclosed in headers_start/headers_end are copied +@@ -639,6 +650,14 @@ struct sk_buff { + __u8 csum_level:2; + __u8 csum_bad:1; + ++#ifdef __BIG_ENDIAN_BITFIELD ++#define PKT_VLAN_PRESENT_BIT 7 ++#else ++#define PKT_VLAN_PRESENT_BIT 0 ++#endif ++#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset) ++ __u8 __pkt_vlan_present_offset[0]; ++ __u8 vlan_present:1; + #ifdef CONFIG_IPV6_NDISC_NODETYPE + __u8 ndisc_nodetype:2; + #endif +@@ -647,7 +666,7 @@ struct sk_buff { + __u8 remcsum_offload:1; + __u8 gro_skip:1; + __u8 fast_forwarded:1; +- /* 1 or 3 bit hole */ ++ /* 0 or 2 bit hole */ + + #ifdef CONFIG_NET_SCHED + __u16 tc_index; /* traffic control index */ +@@ -805,6 +824,15 @@ static inline struct rtable *skb_rtable( + return (struct rtable *)skb_dst(skb); + } + ++/* For mangling skb->pkt_type from user space side from applications ++ * such as nft, tc, etc, we only allow a conservative subset of ++ * possible pkt_types to be set. ++*/ ++static inline bool skb_pkt_type_ok(u32 ptype) ++{ ++ return ptype <= PACKET_OTHERHOST; ++} ++ + void kfree_skb(struct sk_buff *skb); + void kfree_skb_list(struct sk_buff *segs); + void skb_tx_error(struct sk_buff *skb); +@@ -2127,6 +2155,11 @@ static inline unsigned char *skb_mac_hea + return skb->head + skb->mac_header; + } + ++static inline u32 skb_mac_header_len(const struct sk_buff *skb) ++{ ++ return skb->network_header - skb->mac_header; ++} ++ + static inline int skb_mac_header_was_set(const struct sk_buff *skb) + { + return skb->mac_header != (typeof(skb->mac_header))~0U; +@@ -2256,7 +2289,7 @@ static inline int pskb_network_may_pull( + + int ___pskb_trim(struct sk_buff *skb, unsigned int len); + +-static inline void __skb_trim(struct sk_buff *skb, unsigned int len) ++static inline void __skb_set_length(struct sk_buff *skb, unsigned int len) + { + if (unlikely(skb_is_nonlinear(skb))) { + WARN_ON(1); +@@ -2266,6 +2299,11 @@ static inline void __skb_trim(struct sk_ + skb_set_tail_pointer(skb, len); + } + ++static inline void __skb_trim(struct sk_buff *skb, unsigned int len) ++{ ++ __skb_set_length(skb, len); ++} ++ + void skb_trim(struct sk_buff *skb, unsigned int len); + + static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) +@@ -2318,6 +2356,20 @@ static inline struct sk_buff *skb_reduce + return skb; + } + ++static inline int __skb_grow(struct sk_buff *skb, unsigned int len) ++{ ++ unsigned int diff = len - skb->len; ++ ++ if (skb_tailroom(skb) < diff) { ++ int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb), ++ GFP_ATOMIC); ++ if (ret) ++ return ret; ++ } ++ __skb_set_length(skb, len); ++ return 0; ++} ++ + /** + * skb_orphan - orphan a buffer + * @skb: buffer to orphan +@@ -2818,6 +2870,18 @@ static inline int skb_linearize_cow(stru + __skb_linearize(skb) : 0; + } + ++static __always_inline void ++__skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len, ++ unsigned int off) ++{ ++ if (skb->ip_summed == CHECKSUM_COMPLETE) ++ skb->csum = csum_block_sub(skb->csum, ++ csum_partial(start, len, 0), off); ++ else if (skb->ip_summed == CHECKSUM_PARTIAL && ++ skb_checksum_start_offset(skb) < 0) ++ skb->ip_summed = CHECKSUM_NONE; ++} ++ + /** + * skb_postpull_rcsum - update checksum for received skb after pull + * @skb: buffer to update +@@ -2828,36 +2892,38 @@ static inline int skb_linearize_cow(stru + * update the CHECKSUM_COMPLETE checksum, or set ip_summed to + * CHECKSUM_NONE so that it can be recomputed from scratch. + */ +- + static inline void skb_postpull_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) + { +- if (skb->ip_summed == CHECKSUM_COMPLETE) +- skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); +- else if (skb->ip_summed == CHECKSUM_PARTIAL && +- skb_checksum_start_offset(skb) < 0) +- skb->ip_summed = CHECKSUM_NONE; ++ __skb_postpull_rcsum(skb, start, len, 0); + } + +-unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); ++static __always_inline void ++__skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len, ++ unsigned int off) ++{ ++ if (skb->ip_summed == CHECKSUM_COMPLETE) ++ skb->csum = csum_block_add(skb->csum, ++ csum_partial(start, len, 0), off); ++} + ++/** ++ * skb_postpush_rcsum - update checksum for received skb after push ++ * @skb: buffer to update ++ * @start: start of data after push ++ * @len: length of data pushed ++ * ++ * After doing a push on a received packet, you need to call this to ++ * update the CHECKSUM_COMPLETE checksum. ++ */ + static inline void skb_postpush_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) + { +- /* For performing the reverse operation to skb_postpull_rcsum(), +- * we can instead of ... +- * +- * skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); +- * +- * ... just use this equivalent version here to save a few +- * instructions. Feeding csum of 0 in csum_partial() and later +- * on adding skb->csum is equivalent to feed skb->csum in the +- * first place. +- */ +- if (skb->ip_summed == CHECKSUM_COMPLETE) +- skb->csum = csum_partial(start, len, skb->csum); ++ __skb_postpush_rcsum(skb, start, len, 0); + } + ++unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); ++ + /** + * skb_push_rcsum - push skb and update receive checksum + * @skb: buffer to update +@@ -2901,6 +2967,21 @@ static inline int pskb_trim_rcsum(struct + #define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode)) + #define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode)) + ++static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len) ++{ ++ if (skb->ip_summed == CHECKSUM_COMPLETE) ++ skb->ip_summed = CHECKSUM_NONE; ++ __skb_trim(skb, len); ++ return 0; ++} ++ ++static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) ++{ ++ if (skb->ip_summed == CHECKSUM_COMPLETE) ++ skb->ip_summed = CHECKSUM_NONE; ++ return __skb_grow(skb, len); ++} ++ + #define skb_queue_walk(queue, skb) \ + for (skb = (queue)->next; \ + skb != (struct sk_buff *)(queue); \ +@@ -3662,6 +3743,13 @@ static inline bool skb_is_gso_v6(const s + return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; + } + ++static inline void skb_gso_reset(struct sk_buff *skb) ++{ ++ skb_shinfo(skb)->gso_size = 0; ++ skb_shinfo(skb)->gso_segs = 0; ++ skb_shinfo(skb)->gso_type = 0; ++} ++ + void __skb_warn_lro_forwarding(const struct sk_buff *skb); + + static inline bool skb_warn_if_lro(const struct sk_buff *skb) +--- a/include/linux/if_arp.h ++++ b/include/linux/if_arp.h +@@ -44,4 +44,21 @@ static inline int arp_hdr_len(struct net + return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; + } + } ++ ++static inline bool dev_is_mac_header_xmit(const struct net_device *dev) ++{ ++ switch (dev->type) { ++ case ARPHRD_TUNNEL: ++ case ARPHRD_TUNNEL6: ++ case ARPHRD_SIT: ++ case ARPHRD_IPGRE: ++ case ARPHRD_VOID: ++ case ARPHRD_NONE: ++ case ARPHRD_RAWIP: ++ return false; ++ default: ++ return true; ++ } ++} ++ + #endif /* _LINUX_IF_ARP_H */ +--- a/include/linux/if_vlan.h ++++ b/include/linux/if_vlan.h +@@ -66,7 +66,6 @@ static inline struct vlan_ethhdr *vlan_e + #define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ + #define VLAN_PRIO_SHIFT 13 + #define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ +-#define VLAN_TAG_PRESENT VLAN_CFI_MASK + #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ + #define VLAN_N_VID 4096 + +@@ -78,8 +77,8 @@ static inline bool is_vlan_dev(struct ne + return dev->priv_flags & IFF_802_1Q_VLAN; + } + +-#define skb_vlan_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) +-#define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) ++#define skb_vlan_tag_present(__skb) ((__skb)->vlan_present) ++#define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci) + #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) + + /** +@@ -376,6 +375,31 @@ static inline struct sk_buff *vlan_inser + return skb; + } + ++/** ++ * __vlan_hwaccel_clear_tag - clear hardware accelerated VLAN info ++ * @skb: skbuff to clear ++ * ++ * Clears the VLAN information from @skb ++ */ ++static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) ++{ ++ skb->vlan_present = 0; ++} ++ ++/** ++ * __vlan_hwaccel_copy_tag - copy hardware accelerated VLAN info from another skb ++ * @dst: skbuff to copy to ++ * @src: skbuff to copy from ++ * ++ * Copies VLAN information from @src to @dst (for branchless code) ++ */ ++static inline void __vlan_hwaccel_copy_tag(struct sk_buff *dst, const struct sk_buff *src) ++{ ++ dst->vlan_present = src->vlan_present; ++ dst->vlan_proto = src->vlan_proto; ++ dst->vlan_tci = src->vlan_tci; ++} ++ + /* + * __vlan_hwaccel_push_inside - pushes vlan tag to the payload + * @skb: skbuff to tag +@@ -390,7 +414,7 @@ static inline struct sk_buff *__vlan_hwa + skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, + skb_vlan_tag_get(skb)); + if (likely(skb)) +- skb->vlan_tci = 0; ++ __vlan_hwaccel_clear_tag(skb); + return skb; + } + /* +@@ -422,7 +446,8 @@ static inline void __vlan_hwaccel_put_ta + __be16 vlan_proto, u16 vlan_tci) + { + skb->vlan_proto = vlan_proto; +- skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci; ++ skb->vlan_tci = vlan_tci; ++ skb->vlan_present = 1; + } + + /** +--- a/include/net/checksum.h ++++ b/include/net/checksum.h +@@ -120,6 +120,11 @@ static inline __wsum csum_partial_ext(co + + #define CSUM_MANGLED_0 ((__force __sum16)0xffff) + ++static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) ++{ ++ *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); ++} ++ + static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) + { + __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); +--- a/lib/test_bpf.c ++++ b/lib/test_bpf.c +@@ -38,6 +38,7 @@ + #define SKB_HASH 0x1234aaab + #define SKB_QUEUE_MAP 123 + #define SKB_VLAN_TCI 0xffff ++#define SKB_VLAN_PRESENT 1 + #define SKB_DEV_IFINDEX 577 + #define SKB_DEV_TYPE 588 + +@@ -691,8 +692,8 @@ static struct bpf_test tests[] = { + CLASSIC, + { }, + { +- { 1, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT }, +- { 10, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT } ++ { 1, SKB_VLAN_TCI }, ++ { 10, SKB_VLAN_TCI } + }, + }, + { +@@ -705,8 +706,8 @@ static struct bpf_test tests[] = { + CLASSIC, + { }, + { +- { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, +- { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } ++ { 1, SKB_VLAN_PRESENT }, ++ { 10, SKB_VLAN_PRESENT } + }, + }, + { +@@ -4432,8 +4433,8 @@ static struct bpf_test tests[] = { + CLASSIC, + { }, + { +- { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, +- { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } ++ { 1, SKB_VLAN_PRESENT }, ++ { 10, SKB_VLAN_PRESENT } + }, + .fill_helper = bpf_fill_maxinsns6, + }, +@@ -5144,6 +5145,7 @@ static struct sk_buff *populate_skb(char + skb->hash = SKB_HASH; + skb->queue_mapping = SKB_QUEUE_MAP; + skb->vlan_tci = SKB_VLAN_TCI; ++ skb->vlan_present = SKB_VLAN_PRESENT; + skb->dev = &dev; + skb->dev->ifindex = SKB_DEV_IFINDEX; + skb->dev->type = SKB_DEV_TYPE; +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3171,6 +3171,21 @@ int __dev_forward_skb(struct net_device + int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); + bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); + ++static __always_inline int ____dev_forward_skb(struct net_device *dev, ++ struct sk_buff *skb) ++{ ++ if (skb_orphan_frags(skb, GFP_ATOMIC) || ++ unlikely(!is_skb_forwardable(dev, skb))) { ++ atomic_long_inc(&dev->rx_dropped); ++ kfree_skb(skb); ++ return NET_RX_DROP; ++ } ++ ++ skb_scrub_packet(skb, true); ++ skb->priority = 0; ++ return 0; ++} ++ + extern int netdev_budget; + + /* Called by rtnetlink.c:rtnl_unlock() */ +--- a/net/openvswitch/actions.c ++++ b/net/openvswitch/actions.c +@@ -246,7 +246,7 @@ static int push_vlan(struct sk_buff *skb + else + key->eth.tci = vlan->vlan_tci; + return skb_vlan_push(skb, vlan->vlan_tpid, +- ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); ++ ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK); + } + + /* 'src' is already properly masked. */ +--- a/net/openvswitch/flow.c ++++ b/net/openvswitch/flow.c +@@ -318,7 +318,7 @@ static int parse_vlan(struct sk_buff *sk + return -ENOMEM; + + qp = (struct qtag_prefix *) skb->data; +- key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); ++ key->eth.tci = qp->tci | htons(VLAN_CFI_MASK); + __skb_pull(skb, sizeof(struct qtag_prefix)); + + return 0; +--- a/net/openvswitch/flow.h ++++ b/net/openvswitch/flow.h +@@ -69,7 +69,7 @@ struct sw_flow_key { + struct { + u8 src[ETH_ALEN]; /* Ethernet source address. */ + u8 dst[ETH_ALEN]; /* Ethernet destination address. */ +- __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ ++ __be16 tci; /* 0 if no VLAN, VLAN_CFI_MASK set otherwise. */ + __be16 type; /* Ethernet frame type. */ + } eth; + union { +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -925,11 +925,11 @@ static int ovs_key_from_nlattrs(struct n + __be16 tci; + + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); +- if (!(tci & htons(VLAN_TAG_PRESENT))) { ++ if (!(tci & htons(VLAN_CFI_MASK))) { + if (is_mask) +- OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit."); ++ OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_CFI_MASK bit."); + else +- OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set."); ++ OVS_NLERR(log, "VLAN TCI does not have VLAN_CFI_MASK bit set."); + + return -EINVAL; + } +@@ -1209,7 +1209,7 @@ int ovs_nla_get_match(struct net *net, s + key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); + encap_valid = true; + +- if (tci & htons(VLAN_TAG_PRESENT)) { ++ if (tci & htons(VLAN_CFI_MASK)) { + err = parse_flow_nlattrs(encap, a, &key_attrs, log); + if (err) + return err; +@@ -1297,7 +1297,7 @@ int ovs_nla_get_match(struct net *net, s + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + +- if (!(tci & htons(VLAN_TAG_PRESENT))) { ++ if (!(tci & htons(VLAN_CFI_MASK))) { + OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).", + ntohs(tci)); + err = -EINVAL; +@@ -2272,7 +2272,7 @@ static int __ovs_nla_copy_actions(struct + vlan = nla_data(a); + if (vlan->vlan_tpid != htons(ETH_P_8021Q)) + return -EINVAL; +- if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) ++ if (!(vlan->vlan_tci & htons(VLAN_CFI_MASK))) + return -EINVAL; + vlan_tci = vlan->vlan_tci; + break; +@@ -2288,7 +2288,7 @@ static int __ovs_nla_copy_actions(struct + /* Prohibit push MPLS other than to a white list + * for packets that have a known tag order. + */ +- if (vlan_tci & htons(VLAN_TAG_PRESENT) || ++ if (vlan_tci & htons(VLAN_CFI_MASK) || + (eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6) && + eth_type != htons(ETH_P_ARP) && +@@ -2300,7 +2300,7 @@ static int __ovs_nla_copy_actions(struct + } + + case OVS_ACTION_ATTR_POP_MPLS: +- if (vlan_tci & htons(VLAN_TAG_PRESENT) || ++ if (vlan_tci & htons(VLAN_CFI_MASK) || + !eth_p_mpls(eth_type)) + return -EINVAL; + +--- a/net/sched/act_bpf.c ++++ b/net/sched/act_bpf.c +@@ -220,7 +220,7 @@ static int tcf_bpf_init_from_efd(struct + + bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); + +- fp = bpf_prog_get(bpf_fd); ++ fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_ACT, false); + if (IS_ERR(fp)) + return PTR_ERR(fp); + +--- a/net/sched/cls_bpf.c ++++ b/net/sched/cls_bpf.c +@@ -267,7 +267,7 @@ static int cls_bpf_prog_from_efd(struct + + bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); + +- fp = bpf_prog_get(bpf_fd); ++ fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, false); + if (IS_ERR(fp)) + return PTR_ERR(fp); + +--- a/net/8021q/vlan_core.c ++++ b/net/8021q/vlan_core.c +@@ -50,7 +50,7 @@ bool vlan_do_receive(struct sk_buff **sk + } + + skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); +- skb->vlan_tci = 0; ++ __vlan_hwaccel_clear_tag(skb); + + rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats); + +--- a/net/ipv4/ip_tunnel_core.c ++++ b/net/ipv4/ip_tunnel_core.c +@@ -128,7 +128,7 @@ int iptunnel_pull_header(struct sk_buff + secpath_reset(skb); + skb_clear_hash_if_not_l4(skb); + skb_dst_drop(skb); +- skb->vlan_tci = 0; ++ __vlan_hwaccel_clear_tag(skb); + skb_set_queue_mapping(skb, 0); + skb->pkt_type = PACKET_HOST; + +--- a/net/bridge/br_netfilter_hooks.c ++++ b/net/bridge/br_netfilter_hooks.c +@@ -673,10 +673,8 @@ static int br_nf_push_frag_xmit(struct n + return 0; + } + +- if (data->vlan_tci) { +- skb->vlan_tci = data->vlan_tci; +- skb->vlan_proto = data->vlan_proto; +- } ++ if (data->vlan_proto) ++ __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); + + skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); + __skb_push(skb, data->encap_size); +@@ -740,8 +738,13 @@ static int br_nf_dev_queue_xmit(struct n + + data = this_cpu_ptr(&brnf_frag_data_storage); + +- data->vlan_tci = skb->vlan_tci; +- data->vlan_proto = skb->vlan_proto; ++ if (skb_vlan_tag_present(skb)) { ++ data->vlan_tci = skb->vlan_tci; ++ data->vlan_proto = skb->vlan_proto; ++ } else { ++ data->vlan_proto = 0; ++ } ++ + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -751,7 +751,7 @@ static inline int br_vlan_get_tag(const + int err = 0; + + if (skb_vlan_tag_present(skb)) { +- *vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK; ++ *vid = skb_vlan_tag_get_id(skb); + } else { + *vid = 0; + err = -EINVAL; +--- a/net/bridge/br_vlan.c ++++ b/net/bridge/br_vlan.c +@@ -354,7 +354,7 @@ struct sk_buff *br_handle_vlan(struct ne + } + } + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) +- skb->vlan_tci = 0; ++ __vlan_hwaccel_clear_tag(skb); + + out: + return skb; +@@ -420,8 +420,8 @@ static bool __allowed_ingress(struct net + __vlan_hwaccel_put_tag(skb, proto, pvid); + else + /* Priority-tagged Frame. +- * At this point, We know that skb->vlan_tci had +- * VLAN_TAG_PRESENT bit and its VID field was 0x000. ++ * At this point, we know that skb->vlan_tci VID ++ * field was 0. + * We update only VID field and preserve PCP field. + */ + skb->vlan_tci |= pvid; +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3988,7 +3988,7 @@ ncls: + * and set skb->priority like in vlan_do_receive() + * For the time being, just ignore Priority Code Point + */ +- skb->vlan_tci = 0; ++ __vlan_hwaccel_clear_tag(skb); + } + + type = skb->protocol; +@@ -4211,7 +4211,9 @@ static void gro_list_prepare(struct napi + } + + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; +- diffs |= p->vlan_tci ^ skb->vlan_tci; ++ diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb); ++ if (skb_vlan_tag_present(p)) ++ diffs |= p->vlan_tci ^ skb->vlan_tci; + diffs |= skb_metadata_dst_cmp(p, skb); + if (maclen == ETH_HLEN) + diffs |= compare_ether_header(skb_mac_header(p), +@@ -4452,7 +4454,7 @@ static void napi_reuse_skb(struct napi_s + __skb_pull(skb, skb_headlen(skb)); + /* restore the reserve we had after netdev_alloc_skb_ip_align() */ + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); +- skb->vlan_tci = 0; ++ __vlan_hwaccel_clear_tag(skb); + skb->dev = napi->dev; + skb->skb_iif = 0; + skb->encapsulation = 0; +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -4491,7 +4491,7 @@ int skb_vlan_pop(struct sk_buff *skb) + int err; + + if (likely(skb_vlan_tag_present(skb))) { +- skb->vlan_tci = 0; ++ __vlan_hwaccel_clear_tag(skb); + } else { + if (unlikely((skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD)) || +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1484,6 +1484,14 @@ do { \ + lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ + } while (0) + ++#ifdef CONFIG_LOCKDEP ++static inline bool lockdep_sock_is_held(struct sock *sk) ++{ ++ return lockdep_is_held(&sk->sk_lock) || ++ lockdep_is_held(&sk->sk_lock.slock); ++} ++#endif ++ + void lock_sock_nested(struct sock *sk, int subclass); + + static inline void lock_sock(struct sock *sk) diff --git a/toolchain/kernel-headers/Makefile b/toolchain/kernel-headers/Makefile index c33f26d46d..06236b5a47 100644 --- a/toolchain/kernel-headers/Makefile +++ b/toolchain/kernel-headers/Makefile @@ -12,6 +12,14 @@ BUILD_DIR := $(KERNEL_BUILD_DIR) override QUILT:= override HOST_QUILT:= +include $(INCLUDE_DIR)/target.mk + +ifeq ($(KERNEL_PATCHVER),4.4) + KERNEL_PATCHVER := 5.10 + KERNEL_NAME_SUFFIX:= +endif + +include $(INCLUDE_DIR)/kernel-version.mk include $(INCLUDE_DIR)/kernel.mk PKG_NAME:=linux -- 2.25.1