Francesco Carzaniga
Francesco Carzaniga

Reputation: 135

Multiple `bpf_clone_redirect` send packet only to last address

I am trying to create a simple BPF filter for tc to duplicate all packets arriving on a specific interface to two (or more) lookback addresses. The following code works with one address, but if I add a second bpf_clone_redirect then the packets are only received on the second address and not on the first. tcpdump -nevvi lo shows all packets destined to both.

#include <linux/bpf.h>
#include <linux/pkt_cls.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include "bpf_helpers.h"
#include <arpa/inet.h>

#define CLONE_1 2130706689
#define CLONE_2 2130706690

#define LOOPBACK_INTERFACE_INDEX 1

struct l3_fields
{
    __u32 saddr;
    __u32 daddr;
};

struct l4_fields
{
    __u16 sport;
    __u16 dport;
};

struct udphdr
{
    __be16 source;
    __be16 dest;
    __be16 len;
    __sum16 check;
};

SEC("tc_eth0")
int dns_redirect_eth0_loopback(struct __sk_buff *skb)
{
    void *data_end = (void *)(long)skb->data_end;
    void *data = (void *)(long)skb->data;
    struct ethhdr *eth = data;
    struct iphdr *ip4h;
    struct l3_fields l3_original_fields;
    struct l3_fields l3_new_fields;
    struct l4_fields l4_original_fields;
    struct l4_fields l4_new_fields;

    // redirect packet to loopback interface
    __u32 ifindex = LOOPBACK_INTERFACE_INDEX;

    // Checking if eth headers are incomplete
    if (data + sizeof(*eth) > data_end)
    {
        return TC_ACT_SHOT;
    }

    // Allowing IPV6 packets to passthrough without modification
    if (ntohs(eth->h_proto) != ETH_P_IP)
    {
        return TC_ACT_OK;
    }

    // Checking if IP headers are incomplete
    if (data + sizeof(*eth) + sizeof(*ip4h) > data_end)
    {
        return TC_ACT_SHOT;
    }

    ip4h = data + sizeof(*eth);
    bpf_skb_load_bytes(skb, sizeof(*eth) + offsetof(struct iphdr, saddr), &l3_original_fields, sizeof(l3_original_fields));
    bpf_skb_load_bytes(skb, sizeof(*eth) + sizeof(*ip4h), &l4_original_fields, sizeof(l4_original_fields));
    bpf_skb_load_bytes(skb, sizeof(*eth) + offsetof(struct iphdr, saddr), &l3_new_fields, sizeof(l3_new_fields));
    bpf_skb_load_bytes(skb, sizeof(*eth) + sizeof(*ip4h), &l4_new_fields, sizeof(l4_new_fields));

    // Change destination address to LOOPBACK
    l3_new_fields.daddr = htonl(CLONE_1);

    bpf_skb_store_bytes(skb, sizeof(*eth) + offsetof(struct iphdr, saddr), &l3_new_fields, sizeof(l3_new_fields), BPF_F_RECOMPUTE_CSUM);

    __u64 l3sum = bpf_csum_diff((__u32 *)&l3_original_fields, sizeof(l3_original_fields), (__u32 *)&l3_new_fields, sizeof(l3_new_fields), 0);
    __u64 l4sum = bpf_csum_diff((__u32 *)&l4_original_fields, sizeof(l4_original_fields), (__u32 *)&l4_new_fields, sizeof(l4_new_fields), l3sum);

    int csumret = bpf_l4_csum_replace(skb, sizeof(*eth) + sizeof(*ip4h) + offsetof(struct udphdr, check), 0, l4sum, BPF_F_PSEUDO_HDR);
    csumret |= bpf_l3_csum_replace(skb, sizeof(*eth) + offsetof(struct iphdr, check), 0, l3sum, 0);

    if (csumret)
    {
        return TC_ACT_SHOT;
    }

    int ret = bpf_clone_redirect(skb, LOOPBACK_INTERFACE_INDEX, 1);

    bpf_skb_load_bytes(skb, sizeof(*eth) + offsetof(struct iphdr, saddr), &l3_original_fields, sizeof(l3_original_fields));
    bpf_skb_load_bytes(skb, sizeof(*eth) + sizeof(*ip4h), &l4_original_fields, sizeof(l4_original_fields));

    // Change destination address to LOOPBACK
    l3_new_fields.daddr = htonl(CLONE_2);

    bpf_skb_store_bytes(skb, sizeof(*eth) + offsetof(struct iphdr, saddr), &l3_new_fields, sizeof(l3_new_fields), BPF_F_RECOMPUTE_CSUM);

    l3sum = bpf_csum_diff((__u32 *)&l3_original_fields, sizeof(l3_original_fields), (__u32 *)&l3_new_fields, sizeof(l3_new_fields), 0);
    l4sum = bpf_csum_diff((__u32 *)&l4_original_fields, sizeof(l4_original_fields), (__u32 *)&l4_new_fields, sizeof(l4_new_fields), l3sum);

    csumret = bpf_l4_csum_replace(skb, sizeof(*eth) + sizeof(*ip4h) + offsetof(struct udphdr, check), 0, l4sum, BPF_F_PSEUDO_HDR);
    csumret |= bpf_l3_csum_replace(skb, sizeof(*eth) + offsetof(struct iphdr, check), 0, l3sum, 0);

    if (csumret)
    {
        return TC_ACT_SHOT;
    }

    ret &= bpf_clone_redirect(skb, LOOPBACK_INTERFACE_INDEX, 1);

    return ret;
}

P.S. the code is heavily based on other things I found online, but the principle should work.

Edit: A trace I got with pwru:

0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  868   192.168.3.101:26090->192.168.3.100:26090(udp) inet_gro_receive
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  868   192.168.3.101:26090->192.168.3.100:26090(udp) udp4_gro_receive
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  868   192.168.3.101:26090->192.168.3.100:26090(udp) udp_gro_receive
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  868   192.168.3.101:26090->192.168.3.100:26090(udp) gro_normal_one
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  868   192.168.3.101:26090->192.168.3.100:26090(udp) tcf_classify_ingress
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->192.168.3.100:26090(udp) skb_ensure_writable
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     skb_ensure_writable
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     inet_proto_csum_replace_by_diff
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     skb_ensure_writable
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     skb_clone
0xffff967443545c00 0   <empty>:0        0          0               0         0x0000 0     0     :0->:0()                                      __copy_skb_header
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     skb_ensure_writable
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     pskb_expand_head
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     skb_release_data
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     skb_headers_offset_update
0xffff967443545c00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     __bpf_redirect
0xffff967443545c00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     dev_forward_skb
0xffff967443545c00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     __dev_forward_skb
0xffff967443545c00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     skb_scrub_packet
0xffff967443545c00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     eth_type_trans
0xffff967443545c00 0   <empty>:0        4026531992 0              lo:1       0x0800 65536 868   192.168.3.101:26090->127.0.1.1:26090(udp)     netif_rx_internal
0xffff967443545c00 0   <empty>:0        4026531992 0              lo:1       0x0800 65536 868   192.168.3.101:26090->127.0.1.1:26090(udp)     enqueue_to_backlog
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.1:26090(udp)     skb_ensure_writable
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     skb_ensure_writable
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     inet_proto_csum_replace_by_diff
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     skb_ensure_writable
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     skb_clone
0xffff967443545400 0   <empty>:0        0          0               0         0x0000 0     0     :0->:0()                                      __copy_skb_header
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     skb_ensure_writable
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     pskb_expand_head
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     skb_release_data
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     skb_headers_offset_update
0xffff967443545400 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     __bpf_redirect
0xffff967443545400 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     dev_forward_skb
0xffff967443545400 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     __dev_forward_skb
0xffff967443545400 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     skb_scrub_packet
0xffff967443545400 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  882   192.168.3.101:26090->127.0.1.2:26090(udp)     eth_type_trans
0xffff967443545400 0   <empty>:0        4026531992 0              lo:1       0x0800 65536 868   192.168.3.101:26090->127.0.1.2:26090(udp)     netif_rx_internal
0xffff967443545400 0   <empty>:0        4026531992 0              lo:1       0x0800 65536 868   192.168.3.101:26090->127.0.1.2:26090(udp)     enqueue_to_backlog
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  868   192.168.3.101:26090->127.0.1.2:26090(udp)     ip_rcv_core
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  868   192.168.3.101:26090->127.0.1.2:26090(udp)     udp_v4_early_demux
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  868   192.168.3.101:26090->127.0.1.2:26090(udp)     ip_route_input_noref
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  868   192.168.3.101:26090->127.0.1.2:26090(udp)     ip_route_input_rcu
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  868   192.168.3.101:26090->127.0.1.2:26090(udp)     ip_route_input_slow
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  868   192.168.3.101:26090->127.0.1.2:26090(udp)     fib_validate_source
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 868   192.168.3.101:26090->127.0.1.2:26090(udp)     ip_local_deliver
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 868   192.168.3.101:26090->127.0.1.2:26090(udp)     nf_hook_slow
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 868   192.168.3.101:26090->127.0.1.2:26090(udp)     ip_local_deliver_finish
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     ip_protocol_deliver_rcu
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     raw_local_deliver
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     udp_rcv
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     __udp4_lib_rcv
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     __icmp_send
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     __ip_options_echo
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     security_skb_classify_flow
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     __xfrm_decode_session
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     decode_session4
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     security_xfrm_decode_session
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     kfree_skb
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 65536 848   192.168.3.101:26090->127.0.1.2:26090(udp)     skb_release_head_state
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  848   192.168.3.101:26090->127.0.1.2:26090(udp)     skb_release_data
0xffff967443545a00 0   <empty>:0        4026531992 0            enp1s0:2     0x0800 9000  848   192.168.3.101:26090->127.0.1.2:26090(udp)     kfree_skbmem

Clearly the second packet goes through a lot more than the first, though I don't know enough about it to diagnose.

Upvotes: 0

Views: 97

Answers (1)

SPYFF
SPYFF

Reputation: 131

You can't do multiple redirects for a packet from one program. This also applies for XDP as well. This limitation might be lifted in the future, but no-one adressed it yet.

The related issue and possible solution for XDP (not for the TC hook but in principle it might be applies for that as well): https://www.spinics.net/lists/xdp-newbies/msg02552.html

A possibe workaround could be passing the packet into userspace with bpf_ringbuf, modify it and inject it back to the network on AF_PACKET. This might introduce 50-150 microsec extra latency.

Edit: The above applies for XDP only. As @pchaigno pointed out this is doable with TC. You have to create a map with the interfaces you want to redirect the packet. Then use bpf_for_each_map_elem to iterate the map with a callback function which does the actual redirect bpf_clone_redirect.

Upvotes: -1

Related Questions