两台云主机处于同一个物理主机的fip和安全组会话冲突

问题描述:

在开启安全组情况下,同物理机器上同一个br-int两个绑定fip的云主机icmp通信 时通时不通,TCP协议通信始终没有无法连通

模拟的脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#Creating ns1 with IP 10.100.5.8(FIP 192.168.56.31, Mac fa:16:3e:00:eb:c0)
sudo ip netns add ns1
sudo ovs-vsctl add-br test0
sudo ip link add vns1 type veth peer name vpeerns1
sudo ip link set vpeerns1 netns ns1
sudo ip link set vns1 up
sudo ip netns exec ns1 ip link set vpeerns1 address fa:16:3e:1d:3d:01
sudo ip netns exec ns1 ip addr add dev vpeerns1 10.100.5.8/24
sudo ip netns exec ns1 ip link set vpeerns1 up
sudo ip netns exec ns1 ip route add default via 10.100.5.1
sudo ovs-vsctl add-port test0 vns1

#Creating ns2 with IP 10.100.5.9 Mac fa:16:3e:13:85:be(FIP 192.168.56.32, Mac fa:16:3e:71:b6:6e)
sudo ip netns add ns2
sudo ip link add vns2 type veth peer name vpeerns2
sudo ip link set vpeerns2 netns ns2
sudo ip link set vns2 up
sudo ip netns exec ns2 ip link set vpeerns2 address fa:16:3e:13:85:be
sudo ip netns exec ns2 ip addr add dev vpeerns2 10.100.5.9/24
sudo ip netns exec ns2 ip link set vpeerns2 up
sudo ip netns exec ns2 ip route add default via 10.100.5.1
sudo ovs-vsctl add-port test0 vns2

#Dispatcher Table
sudo ovs-ofctl -v -O OpenFlow13 add-flow test0 "table=0,priority=10,in_port=1,ip,actions=goto_table=40"
sudo ovs-ofctl -v -O OpenFlow13 add-flow test0 "table=0,priority=10,in_port=2,ip,actions=goto_table=40"
sudo ovs-ofctl -v -O OpenFlow13 add-flow test0 "table=0,priority=42,arp,actions=goto_table=81"

#L3 and Floating Ip Conversion(Router IP 10.100.5.1 Mac fa:16:3e:57:f2:9a)
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=21,priority=42,ip,nw_dst=10.100.5.9 actions=resubmit(,251)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=21,priority=42,ip,nw_dst=10.100.5.8 actions=resubmit(,251)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=21,priority=42,ip,nw_dst=192.168.56.31 actions=set_field:fa:16:3e:00:eb:c0->eth_dst,goto_table:25"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=21,priority=42,ip,nw_dst=192.168.56.32 actions=set_field:fa:16:3e:71:b6:6e->eth_dst,goto_table:25"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=25,priority=10,ip,dl_dst=fa:16:3e:00:eb:c0,nw_dst=192.168.56.31 actions=set_field:10.100.5.8->ip_dst,set_field:fa:16:3e:57:f2:9a->eth_src,set_field:fa:16:3e:1d:3d:01->eth_dst,goto_table:26"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=25,priority=10,ip,dl_dst=fa:16:3e:71:b6:6e,nw_dst=192.168.56.32 actions=set_field:10.100.5.9->ip_dst,set_field:fa:16:3e:57:f2:9a->eth_src,set_field:fa:16:3e:13:85:be->eth_dst,goto_table:26"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=26,priority=10,ip,nw_src=10.100.5.8 actions=set_field:192.168.56.31->ip_src,goto_table:28"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=26,priority=10,ip,nw_src=10.100.5.9 actions=set_field:192.168.56.32->ip_src,goto_table:28"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=27,priority=10,ip,nw_dst=10.100.5.8 actions=resubmit(,21)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=27,priority=10,ip,nw_dst=10.100.5.9 actions=resubmit(,21)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=28,priority=10,ip,nw_src=192.168.56.31 actions=set_field:fa:16:3e:00:eb:c0->eth_src,resubmit(,21)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=28,priority=10,ip,nw_src=192.168.56.32 actions=set_field:fa:16:3e:71:b6:6e->eth_src,resubmit(,21)"

#ACL ruless egress from vm
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=40,priority=61010,ip,dl_src=fa:16:3e:1d:3d:01,nw_src=10.100.5.8,actions=ct(table=41,zone=5002)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=40,priority=61010,ip,dl_src=fa:16:3e:13:85:be,nw_src=10.100.5.9,actions=ct(table=41,zone=5002)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=40,priority=0,actions=drop"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=41,priority=62020,ct_state=-new+est-rel-inv+trk,actions=resubmit(,21)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=41,priority=62020,ct_state=-new-est+rel-inv+trk,actions=resubmit(,21)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=41,priority=62020,ct_state=+inv+trk,actions=drop"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=41,priority=1000,ct_state=+new+trk,ip,dl_src=fa:16:3e:1d:3d:01,nw_src=10.100.5.8,actions=ct(commit,zone=5002),resubmit(,21)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=41,priority=1000,ct_state=+new+trk,ip,dl_src=fa:16:3e:13:85:be,nw_src=10.100.5.9,actions=ct(commit,zone=5002),resubmit(,21)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=41,priority=50,ct_state=+new+trk,actions=drop"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=41,priority=0,actions=drop"

#ARP responder
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=81,priority=100,arp,arp_tpa=192.168.56.31,arp_op=1 actions=move:NXM_OF_ETH_SRC[]->NXM_OF_ETH_DST[],set_field:fa:16:3e:00:eb:c0->eth_src,load:0x2->NXM_OF_ARP_OP[],move:NXM_NX_ARP_SHA[]->NXM_NX_ARP_THA[],move:NXM_OF_ARP_SPA[]->NXM_OF_ARP_TPA[],load:0xfa163e00ebc0->NXM_NX_ARP_SHA[],load:0xc0a8381f->NXM_OF_ARP_SPA[],load:0->NXM_OF_IN_PORT[],load:0x500->NXM_NX_REG6[],write_metadata:0/0x1,goto_table:220"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=81,priority=100,arp,arp_tpa=192.168.56.32,arp_op=1 actions=move:NXM_OF_ETH_SRC[]->NXM_OF_ETH_DST[],set_field:fa:16:3e:71:b6:6e->eth_src,load:0x2->NXM_OF_ARP_OP[],move:NXM_NX_ARP_SHA[]->NXM_NX_ARP_THA[],move:NXM_OF_ARP_SPA[]->NXM_OF_ARP_TPA[],load:0xfa163e71b66e->NXM_NX_ARP_SHA[],load:0xc0a83820->NXM_OF_ARP_SPA[],load:0->NXM_OF_IN_PORT[],load:0x500->NXM_NX_REG6[],write_metadata:0/0x1,goto_table:220"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=81,arp,arp_tpa=10.100.5.1,arp_op=1 actions=move:NXM_OF_ETH_SRC[]->NXM_OF_ETH_DST[],set_field:fa:16:3e:57:f2:9a->eth_src,load:0x2->NXM_OF_ARP_OP[],move:NXM_NX_ARP_SHA[]->NXM_NX_ARP_THA[],move:NXM_OF_ARP_SPA[]->NXM_OF_ARP_TPA[],load:0xfa163e7c661a->NXM_NX_ARP_SHA[],load:0xa640501->NXM_OF_ARP_SPA[],load:0->NXM_OF_IN_PORT[],load:0x100->NXM_NX_REG6[],resubmit(,220)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=81,priority=10,arp,actions=NORMAL"

#Dispatcher Table
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=220,priority=62020,ip,dl_dst=fa:16:3e:1d:3d:01,nw_dst=10.100.5.8,actions=output:1"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=220,priority=62020,ip,dl_dst=fa:16:3e:13:85:be,nw_dst=10.100.5.9,actions=output:2"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=220,priority=62020,arp,dl_dst=fa:16:3e:1d:3d:01,actions=output:1"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=220,priority=62020,arp,dl_dst=fa:16:3e:13:85:be,actions=output:2"

#ACL ruless ingress to vm
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=251,priority=61010,ip,dl_dst=fa:16:3e:1d:3d:01,nw_dst=10.100.5.8,actions=ct(table=252,zone=5002)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=251,priority=61010,ip,dl_dst=fa:16:3e:13:85:be,nw_dst=10.100.5.9,actions=ct(table=252,zone=5002)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=251,priority=0,actions=drop"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=252,priority=62020,ct_state=-new+est-rel-inv+trk,actions=resubmit(,220)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=252,priority=62020,ct_state=-new-est+rel-inv+trk,actions=resubmit(,220)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=252,priority=62020,ct_state=+inv+trk,actions=drop"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=252,priority=50,ct_state=+new+trk,actions=drop"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=252,priority=1000,ct_state=+new+trk,ip,dl_dst=fa:16:3e:1d:3d:01,nw_dst=10.100.5.8,actions=ct(commit,zone=5002),resubmit(,220)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=252,priority=1000,ct_state=+new+trk,ip,dl_dst=fa:16:3e:13:85:be,nw_dst=10.100.5.9,actions=ct(commit,zone=5002),resubmit(,220)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=252,priority=0,actions=drop"

分析

通过观察流量路径发现,数据包在table15被丢弃,table15是出方向安全组。详细说明如下

eg: 云主机A fip为FIPA 和云主机B fip为FIPB icmp通信

云主机A icmp request数据包可以到达云主机B

云主机B回复该数据包,经过B的出方向安全组模块时候被丢弃,计数如下流表所示

cookie=0x0, duration=1154.456s, table=15, n_packets=0, n_bytes=0, idle_age=18019, priority=65534,ct_state=+inv+trk actions=drop

云主机B回复的数据包被当成异常状态,该数据包被丢弃

另从linux conntrack 会话角度分析问题如下:

conntrack -L查看会话

icmp 1 29 src=7.7.7.11 dst=192.168.210.17 type=8 code=0 id=31233 [UNREPLIED] src=192.168.210.17 dst=7.7.7.11 type=0 code=0 id=31233 mark=0 zone=7 use=1

只有正向会话,缺少源地址和目的地址转换后的会话,所以,云主机B reply数据包因查不到会话而认为该数据包异常状态,将reply丢弃

那么问题可以归纳为,在同一个云主机的两个绑定同网断fip通信,且用流表实现安全组功能时,icmp协议通信时候,fip 转换后的会话建立异常(icmp有时候能建立,TCP一直建立不了),导致云主机连通访问失败

解决办法:

将这两个会话放到不通的ct中问题可解决,具体操作是:

1
2
3
4
5
6
7
8
sudo ovs-ofctl  -O OpenFlow13 add-flow test0 "table=40,priority=61011,ip,dl_src=fa:16:3e:1d:3d:01,nw_src=10.100.5.8,actions=ct(table=41,zone=5001)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=40,priority=61011,ip,dl_src=fa:16:3e:13:85:be,nw_src=10.100.5.9,actions=ct(table=41,zone=5002)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=41,priority=1001,ct_state=+new+trk,ip,dl_src=fa:16:3e:1d:3d:01,nw_src=10.100.5.8,actions=ct(commit,zone=5001),resubmit(,21)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=41,priority=1001,ct_state=+new+trk,ip,dl_src=fa:16:3e:13:85:be,nw_src=10.100.5.9,actions=ct(commit,zone=5002),resubmit(,21)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=251,priority=61011,ip,dl_dst=fa:16:3e:1d:3d:01,nw_dst=10.100.5.8,actions=ct(table=252,zone=5001)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=251,priority=62011,ip,dl_dst=fa:16:3e:13:85:be,nw_dst=10.100.5.9,actions=ct(table=252,zone=5002)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=252,priority=1001,ct_state=+new+trk,ip,dl_dst=fa:16:3e:1d:3d:01,nw_dst=10.100.5.8,actions=ct(commit,zone=5001),resubmit(,220)"
sudo ovs-ofctl -O OpenFlow13 add-flow test0 "table=252,priority=1001,ct_state=+new+trk,ip,dl_dst=fa:16:3e:13:85:be,nw_dst=10.100.5.9,actions=ct(commit,zone=5002),resubmit(,220)"