mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	Implement support for processing the IOAM Pre-allocated Trace with IPv6, see [1] and [2]. Introduce a new IPv6 Hop-by-Hop TLV option, see IANA [3]. A new per-interface sysctl is introduced. The value is a boolean to accept (=1) or ignore (=0, by default) IPv6 IOAM options on ingress for an interface: - net.ipv6.conf.XXX.ioam6_enabled Two other sysctls are introduced to define IOAM IDs, represented by an integer. They are respectively per-namespace and per-interface: - net.ipv6.ioam6_id - net.ipv6.conf.XXX.ioam6_id The value of the first one represents the IOAM ID of the node itself (u32; max and default value = U32_MAX>>8, due to hop limit concatenation) while the other represents the IOAM ID of an interface (u16; max and default value = U16_MAX). Each "ioam6_id" sysctl has a "_wide" equivalent: - net.ipv6.ioam6_id_wide - net.ipv6.conf.XXX.ioam6_id_wide The value of the first one represents the wide IOAM ID of the node itself (u64; max and default value = U64_MAX>>8, due to hop limit concatenation) while the other represents the wide IOAM ID of an interface (u32; max and default value = U32_MAX). The use of short and wide equivalents is not exclusive, a deployment could choose to leverage both. For example, net.ipv6.conf.XXX.ioam6_id (short format) could be an identifier for a physical interface, whereas net.ipv6.conf.XXX.ioam6_id_wide (wide format) could be an identifier for a logical sub-interface. Documentation about new sysctls is provided at the end of this patchset. Two relativistic hash tables are used: one for IOAM namespaces, the other for IOAM schemas. A namespace can only have a single active schema and a schema can only be attached to a single namespace (1:1 relationship). [1] https://tools.ietf.org/html/draft-ietf-ippm-ioam-ipv6-options [2] https://tools.ietf.org/html/draft-ietf-ippm-ioam-data [3] https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml#ipv6-parameters-2 Signed-off-by: Justin Iurman <justin.iurman@uliege.be> Signed-off-by: David S. Miller <davem@davemloft.net>
		
			
				
	
	
		
			359 lines
		
	
	
	
		
			8.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			359 lines
		
	
	
	
		
			8.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0
 | 
						|
/*
 | 
						|
 * sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem.
 | 
						|
 *
 | 
						|
 * Changes:
 | 
						|
 * YOSHIFUJI Hideaki @USAGI:	added icmp sysctl table.
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/mm.h>
 | 
						|
#include <linux/sysctl.h>
 | 
						|
#include <linux/in6.h>
 | 
						|
#include <linux/ipv6.h>
 | 
						|
#include <linux/slab.h>
 | 
						|
#include <linux/export.h>
 | 
						|
#include <net/ndisc.h>
 | 
						|
#include <net/ipv6.h>
 | 
						|
#include <net/addrconf.h>
 | 
						|
#include <net/inet_frag.h>
 | 
						|
#include <net/netevent.h>
 | 
						|
#include <net/ip_fib.h>
 | 
						|
#ifdef CONFIG_NETLABEL
 | 
						|
#include <net/calipso.h>
 | 
						|
#endif
 | 
						|
#include <linux/ioam6.h>
 | 
						|
 | 
						|
static int two = 2;
 | 
						|
static int three = 3;
 | 
						|
static int flowlabel_reflect_max = 0x7;
 | 
						|
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 | 
						|
static u32 rt6_multipath_hash_fields_all_mask =
 | 
						|
	FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
 | 
						|
static u32 ioam6_id_max = IOAM6_DEFAULT_ID;
 | 
						|
static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE;
 | 
						|
 | 
						|
static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
 | 
						|
					  void *buffer, size_t *lenp, loff_t *ppos)
 | 
						|
{
 | 
						|
	struct net *net;
 | 
						|
	int ret;
 | 
						|
 | 
						|
	net = container_of(table->data, struct net,
 | 
						|
			   ipv6.sysctl.multipath_hash_policy);
 | 
						|
	ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
 | 
						|
	if (write && ret == 0)
 | 
						|
		call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
 | 
						|
 | 
						|
	return ret;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
proc_rt6_multipath_hash_fields(struct ctl_table *table, int write, void *buffer,
 | 
						|
			       size_t *lenp, loff_t *ppos)
 | 
						|
{
 | 
						|
	struct net *net;
 | 
						|
	int ret;
 | 
						|
 | 
						|
	net = container_of(table->data, struct net,
 | 
						|
			   ipv6.sysctl.multipath_hash_fields);
 | 
						|
	ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
 | 
						|
	if (write && ret == 0)
 | 
						|
		call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
 | 
						|
 | 
						|
	return ret;
 | 
						|
}
 | 
						|
 | 
						|
static struct ctl_table ipv6_table_template[] = {
 | 
						|
	{
 | 
						|
		.procname	= "bindv6only",
 | 
						|
		.data		= &init_net.ipv6.sysctl.bindv6only,
 | 
						|
		.maxlen		= sizeof(u8),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dou8vec_minmax,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "anycast_src_echo_reply",
 | 
						|
		.data		= &init_net.ipv6.sysctl.anycast_src_echo_reply,
 | 
						|
		.maxlen		= sizeof(u8),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dou8vec_minmax,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "flowlabel_consistency",
 | 
						|
		.data		= &init_net.ipv6.sysctl.flowlabel_consistency,
 | 
						|
		.maxlen		= sizeof(u8),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dou8vec_minmax,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "auto_flowlabels",
 | 
						|
		.data		= &init_net.ipv6.sysctl.auto_flowlabels,
 | 
						|
		.maxlen		= sizeof(u8),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dou8vec_minmax,
 | 
						|
		.extra2		= &auto_flowlabels_max
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "fwmark_reflect",
 | 
						|
		.data		= &init_net.ipv6.sysctl.fwmark_reflect,
 | 
						|
		.maxlen		= sizeof(u8),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dou8vec_minmax,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "idgen_retries",
 | 
						|
		.data		= &init_net.ipv6.sysctl.idgen_retries,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "idgen_delay",
 | 
						|
		.data		= &init_net.ipv6.sysctl.idgen_delay,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec_jiffies,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "flowlabel_state_ranges",
 | 
						|
		.data		= &init_net.ipv6.sysctl.flowlabel_state_ranges,
 | 
						|
		.maxlen		= sizeof(u8),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dou8vec_minmax,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "ip_nonlocal_bind",
 | 
						|
		.data		= &init_net.ipv6.sysctl.ip_nonlocal_bind,
 | 
						|
		.maxlen		= sizeof(u8),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dou8vec_minmax,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "flowlabel_reflect",
 | 
						|
		.data		= &init_net.ipv6.sysctl.flowlabel_reflect,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec_minmax,
 | 
						|
		.extra1		= SYSCTL_ZERO,
 | 
						|
		.extra2		= &flowlabel_reflect_max,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "max_dst_opts_number",
 | 
						|
		.data		= &init_net.ipv6.sysctl.max_dst_opts_cnt,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "max_hbh_opts_number",
 | 
						|
		.data		= &init_net.ipv6.sysctl.max_hbh_opts_cnt,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "max_dst_opts_length",
 | 
						|
		.data		= &init_net.ipv6.sysctl.max_dst_opts_len,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "max_hbh_length",
 | 
						|
		.data		= &init_net.ipv6.sysctl.max_hbh_opts_len,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "fib_multipath_hash_policy",
 | 
						|
		.data		= &init_net.ipv6.sysctl.multipath_hash_policy,
 | 
						|
		.maxlen		= sizeof(u8),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler   = proc_rt6_multipath_hash_policy,
 | 
						|
		.extra1		= SYSCTL_ZERO,
 | 
						|
		.extra2		= &three,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "fib_multipath_hash_fields",
 | 
						|
		.data		= &init_net.ipv6.sysctl.multipath_hash_fields,
 | 
						|
		.maxlen		= sizeof(u32),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_rt6_multipath_hash_fields,
 | 
						|
		.extra1		= SYSCTL_ONE,
 | 
						|
		.extra2		= &rt6_multipath_hash_fields_all_mask,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "seg6_flowlabel",
 | 
						|
		.data		= &init_net.ipv6.sysctl.seg6_flowlabel,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "fib_notify_on_flag_change",
 | 
						|
		.data		= &init_net.ipv6.sysctl.fib_notify_on_flag_change,
 | 
						|
		.maxlen		= sizeof(u8),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dou8vec_minmax,
 | 
						|
		.extra1         = SYSCTL_ZERO,
 | 
						|
		.extra2         = &two,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "ioam6_id",
 | 
						|
		.data		= &init_net.ipv6.sysctl.ioam6_id,
 | 
						|
		.maxlen		= sizeof(u32),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_douintvec_minmax,
 | 
						|
		.extra2		= &ioam6_id_max,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "ioam6_id_wide",
 | 
						|
		.data		= &init_net.ipv6.sysctl.ioam6_id_wide,
 | 
						|
		.maxlen		= sizeof(u64),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_doulongvec_minmax,
 | 
						|
		.extra2		= &ioam6_id_wide_max,
 | 
						|
	},
 | 
						|
	{ }
 | 
						|
};
 | 
						|
 | 
						|
static struct ctl_table ipv6_rotable[] = {
 | 
						|
	{
 | 
						|
		.procname	= "mld_max_msf",
 | 
						|
		.data		= &sysctl_mld_max_msf,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "mld_qrv",
 | 
						|
		.data		= &sysctl_mld_qrv,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec_minmax,
 | 
						|
		.extra1		= SYSCTL_ONE
 | 
						|
	},
 | 
						|
#ifdef CONFIG_NETLABEL
 | 
						|
	{
 | 
						|
		.procname	= "calipso_cache_enable",
 | 
						|
		.data		= &calipso_cache_enabled,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec,
 | 
						|
	},
 | 
						|
	{
 | 
						|
		.procname	= "calipso_cache_bucket_size",
 | 
						|
		.data		= &calipso_cache_bucketsize,
 | 
						|
		.maxlen		= sizeof(int),
 | 
						|
		.mode		= 0644,
 | 
						|
		.proc_handler	= proc_dointvec,
 | 
						|
	},
 | 
						|
#endif /* CONFIG_NETLABEL */
 | 
						|
	{ }
 | 
						|
};
 | 
						|
 | 
						|
static int __net_init ipv6_sysctl_net_init(struct net *net)
 | 
						|
{
 | 
						|
	struct ctl_table *ipv6_table;
 | 
						|
	struct ctl_table *ipv6_route_table;
 | 
						|
	struct ctl_table *ipv6_icmp_table;
 | 
						|
	int err, i;
 | 
						|
 | 
						|
	err = -ENOMEM;
 | 
						|
	ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template),
 | 
						|
			     GFP_KERNEL);
 | 
						|
	if (!ipv6_table)
 | 
						|
		goto out;
 | 
						|
	/* Update the variables to point into the current struct net */
 | 
						|
	for (i = 0; i < ARRAY_SIZE(ipv6_table_template) - 1; i++)
 | 
						|
		ipv6_table[i].data += (void *)net - (void *)&init_net;
 | 
						|
 | 
						|
	ipv6_route_table = ipv6_route_sysctl_init(net);
 | 
						|
	if (!ipv6_route_table)
 | 
						|
		goto out_ipv6_table;
 | 
						|
 | 
						|
	ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
 | 
						|
	if (!ipv6_icmp_table)
 | 
						|
		goto out_ipv6_route_table;
 | 
						|
 | 
						|
	net->ipv6.sysctl.hdr = register_net_sysctl(net, "net/ipv6", ipv6_table);
 | 
						|
	if (!net->ipv6.sysctl.hdr)
 | 
						|
		goto out_ipv6_icmp_table;
 | 
						|
 | 
						|
	net->ipv6.sysctl.route_hdr =
 | 
						|
		register_net_sysctl(net, "net/ipv6/route", ipv6_route_table);
 | 
						|
	if (!net->ipv6.sysctl.route_hdr)
 | 
						|
		goto out_unregister_ipv6_table;
 | 
						|
 | 
						|
	net->ipv6.sysctl.icmp_hdr =
 | 
						|
		register_net_sysctl(net, "net/ipv6/icmp", ipv6_icmp_table);
 | 
						|
	if (!net->ipv6.sysctl.icmp_hdr)
 | 
						|
		goto out_unregister_route_table;
 | 
						|
 | 
						|
	err = 0;
 | 
						|
out:
 | 
						|
	return err;
 | 
						|
out_unregister_route_table:
 | 
						|
	unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
 | 
						|
out_unregister_ipv6_table:
 | 
						|
	unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
 | 
						|
out_ipv6_icmp_table:
 | 
						|
	kfree(ipv6_icmp_table);
 | 
						|
out_ipv6_route_table:
 | 
						|
	kfree(ipv6_route_table);
 | 
						|
out_ipv6_table:
 | 
						|
	kfree(ipv6_table);
 | 
						|
	goto out;
 | 
						|
}
 | 
						|
 | 
						|
static void __net_exit ipv6_sysctl_net_exit(struct net *net)
 | 
						|
{
 | 
						|
	struct ctl_table *ipv6_table;
 | 
						|
	struct ctl_table *ipv6_route_table;
 | 
						|
	struct ctl_table *ipv6_icmp_table;
 | 
						|
 | 
						|
	ipv6_table = net->ipv6.sysctl.hdr->ctl_table_arg;
 | 
						|
	ipv6_route_table = net->ipv6.sysctl.route_hdr->ctl_table_arg;
 | 
						|
	ipv6_icmp_table = net->ipv6.sysctl.icmp_hdr->ctl_table_arg;
 | 
						|
 | 
						|
	unregister_net_sysctl_table(net->ipv6.sysctl.icmp_hdr);
 | 
						|
	unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
 | 
						|
	unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
 | 
						|
 | 
						|
	kfree(ipv6_table);
 | 
						|
	kfree(ipv6_route_table);
 | 
						|
	kfree(ipv6_icmp_table);
 | 
						|
}
 | 
						|
 | 
						|
static struct pernet_operations ipv6_sysctl_net_ops = {
 | 
						|
	.init = ipv6_sysctl_net_init,
 | 
						|
	.exit = ipv6_sysctl_net_exit,
 | 
						|
};
 | 
						|
 | 
						|
static struct ctl_table_header *ip6_header;
 | 
						|
 | 
						|
int ipv6_sysctl_register(void)
 | 
						|
{
 | 
						|
	int err = -ENOMEM;
 | 
						|
 | 
						|
	ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable);
 | 
						|
	if (!ip6_header)
 | 
						|
		goto out;
 | 
						|
 | 
						|
	err = register_pernet_subsys(&ipv6_sysctl_net_ops);
 | 
						|
	if (err)
 | 
						|
		goto err_pernet;
 | 
						|
out:
 | 
						|
	return err;
 | 
						|
 | 
						|
err_pernet:
 | 
						|
	unregister_net_sysctl_table(ip6_header);
 | 
						|
	goto out;
 | 
						|
}
 | 
						|
 | 
						|
void ipv6_sysctl_unregister(void)
 | 
						|
{
 | 
						|
	unregister_net_sysctl_table(ip6_header);
 | 
						|
	unregister_pernet_subsys(&ipv6_sysctl_net_ops);
 | 
						|
}
 |