Reputation: 135
I am trying to create a simple BPF filter for tc
to duplicate all packets arriving on a specific interface to two (or more) lookback addresses. The following code works with one address, but if I add a second bpf_clone_redirect
then the packets are only received on the second address and not on the first. tcpdump -nevvi lo
shows all packets destined to both.
#include <linux/bpf.h>
#include <linux/pkt_cls.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include "bpf_helpers.h"
#include <arpa/inet.h>
#define CLONE_1 2130706689
#define CLONE_2 2130706690
#define LOOPBACK_INTERFACE_INDEX 1
struct l3_fields
{
__u32 saddr;
__u32 daddr;
};
struct l4_fields
{
__u16 sport;
__u16 dport;
};
struct udphdr
{
__be16 source;
__be16 dest;
__be16 len;
__sum16 check;
};
SEC("tc_eth0")
int dns_redirect_eth0_loopback(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
struct ethhdr *eth = data;
struct iphdr *ip4h;
struct l3_fields l3_original_fields;
struct l3_fields l3_new_fields;
struct l4_fields l4_original_fields;
struct l4_fields l4_new_fields;
// redirect packet to loopback interface
__u32 ifindex = LOOPBACK_INTERFACE_INDEX;
// Checking if eth headers are incomplete
if (data + sizeof(*eth) > data_end)
{
return TC_ACT_SHOT;
}
// Allowing IPV6 packets to passthrough without modification
if (ntohs(eth->h_proto) != ETH_P_IP)
{
return TC_ACT_OK;
}
// Checking if IP headers are incomplete
if (data + sizeof(*eth) + sizeof(*ip4h) > data_end)
{
return TC_ACT_SHOT;
}
ip4h = data + sizeof(*eth);
bpf_skb_load_bytes(skb, sizeof(*eth) + offsetof(struct iphdr, saddr), &l3_original_fields, sizeof(l3_original_fields));
bpf_skb_load_bytes(skb, sizeof(*eth) + sizeof(*ip4h), &l4_original_fields, sizeof(l4_original_fields));
bpf_skb_load_bytes(skb, sizeof(*eth) + offsetof(struct iphdr, saddr), &l3_new_fields, sizeof(l3_new_fields));
bpf_skb_load_bytes(skb, sizeof(*eth) + sizeof(*ip4h), &l4_new_fields, sizeof(l4_new_fields));
// Change destination address to LOOPBACK
l3_new_fields.daddr = htonl(CLONE_1);
bpf_skb_store_bytes(skb, sizeof(*eth) + offsetof(struct iphdr, saddr), &l3_new_fields, sizeof(l3_new_fields), BPF_F_RECOMPUTE_CSUM);
__u64 l3sum = bpf_csum_diff((__u32 *)&l3_original_fields, sizeof(l3_original_fields), (__u32 *)&l3_new_fields, sizeof(l3_new_fields), 0);
__u64 l4sum = bpf_csum_diff((__u32 *)&l4_original_fields, sizeof(l4_original_fields), (__u32 *)&l4_new_fields, sizeof(l4_new_fields), l3sum);
int csumret = bpf_l4_csum_replace(skb, sizeof(*eth) + sizeof(*ip4h) + offsetof(struct udphdr, check), 0, l4sum, BPF_F_PSEUDO_HDR);
csumret |= bpf_l3_csum_replace(skb, sizeof(*eth) + offsetof(struct iphdr, check), 0, l3sum, 0);
if (csumret)
{
return TC_ACT_SHOT;
}
int ret = bpf_clone_redirect(skb, LOOPBACK_INTERFACE_INDEX, 1);
bpf_skb_load_bytes(skb, sizeof(*eth) + offsetof(struct iphdr, saddr), &l3_original_fields, sizeof(l3_original_fields));
bpf_skb_load_bytes(skb, sizeof(*eth) + sizeof(*ip4h), &l4_original_fields, sizeof(l4_original_fields));
// Change destination address to LOOPBACK
l3_new_fields.daddr = htonl(CLONE_2);
bpf_skb_store_bytes(skb, sizeof(*eth) + offsetof(struct iphdr, saddr), &l3_new_fields, sizeof(l3_new_fields), BPF_F_RECOMPUTE_CSUM);
l3sum = bpf_csum_diff((__u32 *)&l3_original_fields, sizeof(l3_original_fields), (__u32 *)&l3_new_fields, sizeof(l3_new_fields), 0);
l4sum = bpf_csum_diff((__u32 *)&l4_original_fields, sizeof(l4_original_fields), (__u32 *)&l4_new_fields, sizeof(l4_new_fields), l3sum);
csumret = bpf_l4_csum_replace(skb, sizeof(*eth) + sizeof(*ip4h) + offsetof(struct udphdr, check), 0, l4sum, BPF_F_PSEUDO_HDR);
csumret |= bpf_l3_csum_replace(skb, sizeof(*eth) + offsetof(struct iphdr, check), 0, l3sum, 0);
if (csumret)
{
return TC_ACT_SHOT;
}
ret &= bpf_clone_redirect(skb, LOOPBACK_INTERFACE_INDEX, 1);
return ret;
}
P.S. the code is heavily based on other things I found online, but the principle should work.
Edit: A trace I got with pwru
:
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 868 192.168.3.101:26090->192.168.3.100:26090(udp) inet_gro_receive
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 868 192.168.3.101:26090->192.168.3.100:26090(udp) udp4_gro_receive
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 868 192.168.3.101:26090->192.168.3.100:26090(udp) udp_gro_receive
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 868 192.168.3.101:26090->192.168.3.100:26090(udp) gro_normal_one
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 868 192.168.3.101:26090->192.168.3.100:26090(udp) tcf_classify_ingress
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->192.168.3.100:26090(udp) skb_ensure_writable
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) skb_ensure_writable
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) inet_proto_csum_replace_by_diff
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) skb_ensure_writable
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) skb_clone
0xffff967443545c00 0 <empty>:0 0 0 0 0x0000 0 0 :0->:0() __copy_skb_header
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) skb_ensure_writable
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) pskb_expand_head
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) skb_release_data
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) skb_headers_offset_update
0xffff967443545c00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) __bpf_redirect
0xffff967443545c00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) dev_forward_skb
0xffff967443545c00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) __dev_forward_skb
0xffff967443545c00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) skb_scrub_packet
0xffff967443545c00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) eth_type_trans
0xffff967443545c00 0 <empty>:0 4026531992 0 lo:1 0x0800 65536 868 192.168.3.101:26090->127.0.1.1:26090(udp) netif_rx_internal
0xffff967443545c00 0 <empty>:0 4026531992 0 lo:1 0x0800 65536 868 192.168.3.101:26090->127.0.1.1:26090(udp) enqueue_to_backlog
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.1:26090(udp) skb_ensure_writable
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) skb_ensure_writable
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) inet_proto_csum_replace_by_diff
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) skb_ensure_writable
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) skb_clone
0xffff967443545400 0 <empty>:0 0 0 0 0x0000 0 0 :0->:0() __copy_skb_header
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) skb_ensure_writable
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) pskb_expand_head
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) skb_release_data
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) skb_headers_offset_update
0xffff967443545400 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) __bpf_redirect
0xffff967443545400 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) dev_forward_skb
0xffff967443545400 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) __dev_forward_skb
0xffff967443545400 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) skb_scrub_packet
0xffff967443545400 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 882 192.168.3.101:26090->127.0.1.2:26090(udp) eth_type_trans
0xffff967443545400 0 <empty>:0 4026531992 0 lo:1 0x0800 65536 868 192.168.3.101:26090->127.0.1.2:26090(udp) netif_rx_internal
0xffff967443545400 0 <empty>:0 4026531992 0 lo:1 0x0800 65536 868 192.168.3.101:26090->127.0.1.2:26090(udp) enqueue_to_backlog
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 868 192.168.3.101:26090->127.0.1.2:26090(udp) ip_rcv_core
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 868 192.168.3.101:26090->127.0.1.2:26090(udp) udp_v4_early_demux
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 868 192.168.3.101:26090->127.0.1.2:26090(udp) ip_route_input_noref
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 868 192.168.3.101:26090->127.0.1.2:26090(udp) ip_route_input_rcu
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 868 192.168.3.101:26090->127.0.1.2:26090(udp) ip_route_input_slow
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 868 192.168.3.101:26090->127.0.1.2:26090(udp) fib_validate_source
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 868 192.168.3.101:26090->127.0.1.2:26090(udp) ip_local_deliver
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 868 192.168.3.101:26090->127.0.1.2:26090(udp) nf_hook_slow
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 868 192.168.3.101:26090->127.0.1.2:26090(udp) ip_local_deliver_finish
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) ip_protocol_deliver_rcu
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) raw_local_deliver
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) udp_rcv
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) __udp4_lib_rcv
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) __icmp_send
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) __ip_options_echo
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) security_skb_classify_flow
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) __xfrm_decode_session
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) decode_session4
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) security_xfrm_decode_session
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) kfree_skb
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 65536 848 192.168.3.101:26090->127.0.1.2:26090(udp) skb_release_head_state
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 848 192.168.3.101:26090->127.0.1.2:26090(udp) skb_release_data
0xffff967443545a00 0 <empty>:0 4026531992 0 enp1s0:2 0x0800 9000 848 192.168.3.101:26090->127.0.1.2:26090(udp) kfree_skbmem
Clearly the second packet goes through a lot more than the first, though I don't know enough about it to diagnose.
Upvotes: 0
Views: 97
Reputation: 131
You can't do multiple redirects for a packet from one program. This also applies for XDP as well. This limitation might be lifted in the future, but no-one adressed it yet.
The related issue and possible solution for XDP (not for the TC hook but in principle it might be applies for that as well): https://www.spinics.net/lists/xdp-newbies/msg02552.html
A possibe workaround could be passing the packet into userspace with bpf_ringbuf, modify it and inject it back to the network on AF_PACKET. This might introduce 50-150 microsec extra latency.
Edit: The above applies for XDP only. As @pchaigno pointed out this is doable with TC. You have to create a map with the interfaces you want to redirect the packet. Then use bpf_for_each_map_elem
to iterate the map with a callback function which does the actual redirect bpf_clone_redirect
.
Upvotes: -1