mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	selftests: mlxsw: Add a test for UC behavior under MC flood
A so-called "MC-aware" mode has recently been enabled in mlxsw. In MC-aware mode, BUM traffic is handled in a special way so that when a switch is flooded with BUM, UC performance isn't unduly impacted. Without enablement of this mode, a stream of BUM traffic can cause sustained UC throughput drop in excess of 99 %. Add a test for this behavior. Compare how much UC throughput degrades as a stream of broadcast frames floods the switch. A minimal degradation is tolerated to cover for glitches in traffic injection performance. Signed-off-by: Petr Machata <petrm@mellanox.com> Reviewed-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									a381ed12ea
								
							
						
					
					
						commit
						b5638d46c9
					
				
					 1 changed files with 347 additions and 0 deletions
				
			
		
							
								
								
									
										347
									
								
								tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										347
									
								
								tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,347 @@
 | 
			
		|||
#!/bin/bash
 | 
			
		||||
# SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
#
 | 
			
		||||
# A test for switch behavior under MC overload. An issue in Spectrum chips
 | 
			
		||||
# causes throughput of UC traffic to drop severely when a switch is under heavy
 | 
			
		||||
# MC load. This issue can be overcome by putting the switch to MC-aware mode.
 | 
			
		||||
# This test verifies that UC performance stays intact even as the switch is
 | 
			
		||||
# under MC flood, and therefore that the MC-aware mode is enabled and correctly
 | 
			
		||||
# configured.
 | 
			
		||||
#
 | 
			
		||||
# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
 | 
			
		||||
# at full speed. That makes it impossible to use iperf3 to simply measure the
 | 
			
		||||
# throughput, because many packets (that reach $h3) don't get to the kernel at
 | 
			
		||||
# all even in UDP mode (the situation is even worse in TCP mode, where one can't
 | 
			
		||||
# hope to see more than a couple Mbps).
 | 
			
		||||
#
 | 
			
		||||
# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
 | 
			
		||||
# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
 | 
			
		||||
# each gets a different priority and we can use per-prio ethtool counters to
 | 
			
		||||
# measure the throughput. In order to avoid prioritizing unicast traffic, prio
 | 
			
		||||
# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
 | 
			
		||||
# thus TC 0).
 | 
			
		||||
#
 | 
			
		||||
# Mausezahn can't actually saturate the links unless it's using large frames.
 | 
			
		||||
# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
 | 
			
		||||
# multicast traffic uses 8K frames.
 | 
			
		||||
#
 | 
			
		||||
# +-----------------------+                +----------------------------------+
 | 
			
		||||
# | H1                    |                |                               H2 |
 | 
			
		||||
# |                       |                |  unicast --> + $h2.111           |
 | 
			
		||||
# |                       |                |  traffic     | 192.0.2.129/28    |
 | 
			
		||||
# |          multicast    |                |              | e-qos-map 0:1     |
 | 
			
		||||
# |          traffic      |                |              |                   |
 | 
			
		||||
# | $h1 + <-----          |                |              + $h2               |
 | 
			
		||||
# +-----|-----------------+                +--------------|-------------------+
 | 
			
		||||
#       |                                                 |
 | 
			
		||||
# +-----|-------------------------------------------------|-------------------+
 | 
			
		||||
# |     + $swp1                                           + $swp2             |
 | 
			
		||||
# |     | >1Gbps                                          | >1Gbps            |
 | 
			
		||||
# | +---|----------------+                     +----------|----------------+  |
 | 
			
		||||
# | |   + $swp1.1        |                     |          + $swp2.111      |  |
 | 
			
		||||
# | |                BR1 |             SW      | BR111                     |  |
 | 
			
		||||
# | |   + $swp3.1        |                     |          + $swp3.111      |  |
 | 
			
		||||
# | +---|----------------+                     +----------|----------------+  |
 | 
			
		||||
# |     \_________________________________________________/                   |
 | 
			
		||||
# |                                    |                                      |
 | 
			
		||||
# |                                    + $swp3                                |
 | 
			
		||||
# |                                    | 1Gbps bottleneck                     |
 | 
			
		||||
# |                                    | prio qdisc: {0..7} -> 7              |
 | 
			
		||||
# +------------------------------------|--------------------------------------+
 | 
			
		||||
#                                      |
 | 
			
		||||
#                                   +--|-----------------+
 | 
			
		||||
#                                   |  + $h3          H3 |
 | 
			
		||||
#                                   |  |                 |
 | 
			
		||||
#                                   |  + $h3.111         |
 | 
			
		||||
#                                   |    192.0.2.130/28  |
 | 
			
		||||
#                                   +--------------------+
 | 
			
		||||
 | 
			
		||||
ALL_TESTS="
 | 
			
		||||
	ping_ipv4
 | 
			
		||||
	test_mc_aware
 | 
			
		||||
"
 | 
			
		||||
 | 
			
		||||
lib_dir=$(dirname $0)/../../../net/forwarding
 | 
			
		||||
 | 
			
		||||
NUM_NETIFS=6
 | 
			
		||||
source $lib_dir/lib.sh
 | 
			
		||||
 | 
			
		||||
h1_create()
 | 
			
		||||
{
 | 
			
		||||
	simple_if_init $h1
 | 
			
		||||
	mtu_set $h1 10000
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
h1_destroy()
 | 
			
		||||
{
 | 
			
		||||
	mtu_restore $h1
 | 
			
		||||
	simple_if_fini $h1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
h2_create()
 | 
			
		||||
{
 | 
			
		||||
	simple_if_init $h2
 | 
			
		||||
	mtu_set $h2 10000
 | 
			
		||||
 | 
			
		||||
	vlan_create $h2 111 v$h2 192.0.2.129/28
 | 
			
		||||
	ip link set dev $h2.111 type vlan egress-qos-map 0:1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
h2_destroy()
 | 
			
		||||
{
 | 
			
		||||
	vlan_destroy $h2 111
 | 
			
		||||
 | 
			
		||||
	mtu_restore $h2
 | 
			
		||||
	simple_if_fini $h2
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
h3_create()
 | 
			
		||||
{
 | 
			
		||||
	simple_if_init $h3
 | 
			
		||||
	mtu_set $h3 10000
 | 
			
		||||
 | 
			
		||||
	vlan_create $h3 111 v$h3 192.0.2.130/28
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
h3_destroy()
 | 
			
		||||
{
 | 
			
		||||
	vlan_destroy $h3 111
 | 
			
		||||
 | 
			
		||||
	mtu_restore $h3
 | 
			
		||||
	simple_if_fini $h3
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
switch_create()
 | 
			
		||||
{
 | 
			
		||||
	ip link set dev $swp1 up
 | 
			
		||||
	mtu_set $swp1 10000
 | 
			
		||||
 | 
			
		||||
	ip link set dev $swp2 up
 | 
			
		||||
	mtu_set $swp2 10000
 | 
			
		||||
 | 
			
		||||
	ip link set dev $swp3 up
 | 
			
		||||
	mtu_set $swp3 10000
 | 
			
		||||
 | 
			
		||||
	vlan_create $swp2 111
 | 
			
		||||
	vlan_create $swp3 111
 | 
			
		||||
 | 
			
		||||
	ethtool -s $swp3 speed 1000 autoneg off
 | 
			
		||||
	tc qdisc replace dev $swp3 root handle 3: \
 | 
			
		||||
	   prio bands 8 priomap 7 7 7 7 7 7 7 7
 | 
			
		||||
 | 
			
		||||
	ip link add name br1 type bridge vlan_filtering 0
 | 
			
		||||
	ip link set dev br1 up
 | 
			
		||||
	ip link set dev $swp1 master br1
 | 
			
		||||
	ip link set dev $swp3 master br1
 | 
			
		||||
 | 
			
		||||
	ip link add name br111 type bridge vlan_filtering 0
 | 
			
		||||
	ip link set dev br111 up
 | 
			
		||||
	ip link set dev $swp2.111 master br111
 | 
			
		||||
	ip link set dev $swp3.111 master br111
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
switch_destroy()
 | 
			
		||||
{
 | 
			
		||||
	ip link del dev br111
 | 
			
		||||
	ip link del dev br1
 | 
			
		||||
 | 
			
		||||
	tc qdisc del dev $swp3 root handle 3:
 | 
			
		||||
	ethtool -s $swp3 autoneg on
 | 
			
		||||
 | 
			
		||||
	vlan_destroy $swp3 111
 | 
			
		||||
	vlan_destroy $swp2 111
 | 
			
		||||
 | 
			
		||||
	mtu_restore $swp3
 | 
			
		||||
	ip link set dev $swp3 down
 | 
			
		||||
 | 
			
		||||
	mtu_restore $swp2
 | 
			
		||||
	ip link set dev $swp2 down
 | 
			
		||||
 | 
			
		||||
	mtu_restore $swp1
 | 
			
		||||
	ip link set dev $swp1 down
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
setup_prepare()
 | 
			
		||||
{
 | 
			
		||||
	h1=${NETIFS[p1]}
 | 
			
		||||
	swp1=${NETIFS[p2]}
 | 
			
		||||
 | 
			
		||||
	swp2=${NETIFS[p3]}
 | 
			
		||||
	h2=${NETIFS[p4]}
 | 
			
		||||
 | 
			
		||||
	swp3=${NETIFS[p5]}
 | 
			
		||||
	h3=${NETIFS[p6]}
 | 
			
		||||
 | 
			
		||||
	h3mac=$(mac_get $h3)
 | 
			
		||||
 | 
			
		||||
	vrf_prepare
 | 
			
		||||
 | 
			
		||||
	h1_create
 | 
			
		||||
	h2_create
 | 
			
		||||
	h3_create
 | 
			
		||||
	switch_create
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
cleanup()
 | 
			
		||||
{
 | 
			
		||||
	pre_cleanup
 | 
			
		||||
 | 
			
		||||
	switch_destroy
 | 
			
		||||
	h3_destroy
 | 
			
		||||
	h2_destroy
 | 
			
		||||
	h1_destroy
 | 
			
		||||
 | 
			
		||||
	vrf_cleanup
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ping_ipv4()
 | 
			
		||||
{
 | 
			
		||||
	ping_test $h2 192.0.2.130
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
humanize()
 | 
			
		||||
{
 | 
			
		||||
	local speed=$1; shift
 | 
			
		||||
 | 
			
		||||
	for unit in bps Kbps Mbps Gbps; do
 | 
			
		||||
		if (($(echo "$speed < 1024" | bc))); then
 | 
			
		||||
			break
 | 
			
		||||
		fi
 | 
			
		||||
 | 
			
		||||
		speed=$(echo "scale=1; $speed / 1024" | bc)
 | 
			
		||||
	done
 | 
			
		||||
 | 
			
		||||
	echo "$speed${unit}"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
rate()
 | 
			
		||||
{
 | 
			
		||||
	local t0=$1; shift
 | 
			
		||||
	local t1=$1; shift
 | 
			
		||||
	local interval=$1; shift
 | 
			
		||||
 | 
			
		||||
	echo $((8 * (t1 - t0) / interval))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
check_rate()
 | 
			
		||||
{
 | 
			
		||||
	local rate=$1; shift
 | 
			
		||||
	local min=$1; shift
 | 
			
		||||
	local what=$1; shift
 | 
			
		||||
 | 
			
		||||
	if ((rate > min)); then
 | 
			
		||||
		return 0
 | 
			
		||||
	fi
 | 
			
		||||
 | 
			
		||||
	echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr
 | 
			
		||||
	return 1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
measure_uc_rate()
 | 
			
		||||
{
 | 
			
		||||
	local what=$1; shift
 | 
			
		||||
 | 
			
		||||
	local interval=10
 | 
			
		||||
	local i
 | 
			
		||||
	local ret=0
 | 
			
		||||
 | 
			
		||||
	# Dips in performance might cause momentary ingress rate to drop below
 | 
			
		||||
	# 1Gbps. That wouldn't saturate egress and MC would thus get through,
 | 
			
		||||
	# seemingly winning bandwidth on account of UC. Demand at least 2Gbps
 | 
			
		||||
	# average ingress rate to somewhat mitigate this.
 | 
			
		||||
	local min_ingress=2147483648
 | 
			
		||||
 | 
			
		||||
	mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
 | 
			
		||||
		-a own -b $h3mac -t udp -q &
 | 
			
		||||
	sleep 1
 | 
			
		||||
 | 
			
		||||
	for i in {5..0}; do
 | 
			
		||||
		local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
 | 
			
		||||
		local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
 | 
			
		||||
		sleep $interval
 | 
			
		||||
		local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
 | 
			
		||||
		local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
 | 
			
		||||
 | 
			
		||||
		local ir=$(rate $u0 $u1 $interval)
 | 
			
		||||
		local er=$(rate $t0 $t1 $interval)
 | 
			
		||||
 | 
			
		||||
		if check_rate $ir $min_ingress "$what ingress rate"; then
 | 
			
		||||
			break
 | 
			
		||||
		fi
 | 
			
		||||
 | 
			
		||||
		# Fail the test if we can't get the throughput.
 | 
			
		||||
		if ((i == 0)); then
 | 
			
		||||
			ret=1
 | 
			
		||||
		fi
 | 
			
		||||
	done
 | 
			
		||||
 | 
			
		||||
	# Suppress noise from killing mausezahn.
 | 
			
		||||
	{ kill %% && wait; } 2>/dev/null
 | 
			
		||||
 | 
			
		||||
	echo $ir $er
 | 
			
		||||
	exit $ret
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
test_mc_aware()
 | 
			
		||||
{
 | 
			
		||||
	RET=0
 | 
			
		||||
 | 
			
		||||
	local -a uc_rate
 | 
			
		||||
	uc_rate=($(measure_uc_rate "UC-only"))
 | 
			
		||||
	check_err $? "Could not get high enough UC-only ingress rate"
 | 
			
		||||
	local ucth1=${uc_rate[1]}
 | 
			
		||||
 | 
			
		||||
	mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
 | 
			
		||||
 | 
			
		||||
	local d0=$(date +%s)
 | 
			
		||||
	local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
 | 
			
		||||
	local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
 | 
			
		||||
 | 
			
		||||
	local -a uc_rate_2
 | 
			
		||||
	uc_rate_2=($(measure_uc_rate "UC+MC"))
 | 
			
		||||
	check_err $? "Could not get high enough UC+MC ingress rate"
 | 
			
		||||
	local ucth2=${uc_rate_2[1]}
 | 
			
		||||
 | 
			
		||||
	local d1=$(date +%s)
 | 
			
		||||
	local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
 | 
			
		||||
	local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
 | 
			
		||||
 | 
			
		||||
	local deg=$(bc <<< "
 | 
			
		||||
			scale=2
 | 
			
		||||
			ret = 100 * ($ucth1 - $ucth2) / $ucth1
 | 
			
		||||
			if (ret > 0) { ret } else { 0 }
 | 
			
		||||
		    ")
 | 
			
		||||
	check_err $(bc <<< "$deg > 10")
 | 
			
		||||
 | 
			
		||||
	local interval=$((d1 - d0))
 | 
			
		||||
	local mc_ir=$(rate $u0 $u1 $interval)
 | 
			
		||||
	local mc_er=$(rate $t0 $t1 $interval)
 | 
			
		||||
 | 
			
		||||
	# Suppress noise from killing mausezahn.
 | 
			
		||||
	{ kill %% && wait; } 2>/dev/null
 | 
			
		||||
 | 
			
		||||
	log_test "UC performace under MC overload"
 | 
			
		||||
 | 
			
		||||
	echo "UC-only throughput  $(humanize $ucth1)"
 | 
			
		||||
	echo "UC+MC throughput    $(humanize $ucth2)"
 | 
			
		||||
	echo "Degradation         $deg %"
 | 
			
		||||
	echo
 | 
			
		||||
	echo "Full report:"
 | 
			
		||||
	echo "  UC only:"
 | 
			
		||||
	echo "    ingress UC throughput $(humanize ${uc_rate[0]})"
 | 
			
		||||
	echo "    egress UC throughput  $(humanize ${uc_rate[1]})"
 | 
			
		||||
	echo "  UC+MC:"
 | 
			
		||||
	echo "    ingress UC throughput $(humanize ${uc_rate_2[0]})"
 | 
			
		||||
	echo "    egress UC throughput  $(humanize ${uc_rate_2[1]})"
 | 
			
		||||
	echo "    ingress MC throughput $(humanize $mc_ir)"
 | 
			
		||||
	echo "    egress MC throughput  $(humanize $mc_er)"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
trap cleanup EXIT
 | 
			
		||||
 | 
			
		||||
setup_prepare
 | 
			
		||||
setup_wait
 | 
			
		||||
 | 
			
		||||
tests_run
 | 
			
		||||
 | 
			
		||||
exit $EXIT_STATUS
 | 
			
		||||
		Loading…
	
		Reference in a new issue