remarkable-linux/net/ipv4/sysctl_net_ipv4.c
Hannes Frederic Sowa f87c10a8aa ipv4: introduce ip_dst_mtu_maybe_forward and protect forwarding path against pmtu spoofing
While forwarding we should not use the protocol path mtu to calculate
the mtu for a forwarded packet but instead use the interface mtu.

We mark forwarded skbs in ip_forward with IPSKB_FORWARDED, which was
introduced for multicast forwarding. But as it does not conflict with
our usage in unicast code path it is perfect for reuse.

I moved the functions ip_sk_accept_pmtu, ip_sk_use_pmtu and ip_skb_dst_mtu
along with the new ip_dst_mtu_maybe_forward to net/ip.h to fix circular
dependencies because of IPSKB_FORWARDED.

Because someone might have written a software which does probe
destinations manually and expects the kernel to honour those path mtus
I introduced a new per-namespace "ip_forward_use_pmtu" knob so someone
can disable this new behaviour. We also still use mtus which are locked on a
route for forwarding.

The reason for this change is, that path mtus information can be injected
into the kernel via e.g. icmp_err protocol handler without verification
of local sockets. As such, this could cause the IPv4 forwarding path to
wrongfully emit fragmentation needed notifications or start to fragment
packets along a path.

Tunnel and ipsec output paths clear IPCB again, thus IPSKB_FORWARDED
won't be set and further fragmentation logic will use the path mtu to
determine the fragmentation size. They also recheck packet size with
help of path mtu discovery and report appropriate errors.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: John Heffner <johnwheffner@gmail.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-01-13 11:22:54 -08:00

929 lines
22 KiB
C

/*
* sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
*
* Begun April 1, 1996, Mike Shaver.
* Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
*/
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/igmp.h>
#include <linux/inetdevice.h>
#include <linux/seqlock.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/nsproxy.h>
#include <linux/swap.h>
#include <net/snmp.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/cipso_ipv4.h>
#include <net/inet_frag.h>
#include <net/ping.h>
#include <net/tcp_memcontrol.h>
static int zero;
static int one = 1;
static int four = 4;
static int gso_max_segs = GSO_MAX_SEGS;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
static int tcp_adv_win_scale_min = -31;
static int tcp_adv_win_scale_max = 31;
static int ip_ttl_min = 1;
static int ip_ttl_max = 255;
static int tcp_syn_retries_min = 1;
static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
/* Update system visible IP port range */
static void set_local_port_range(struct net *net, int range[2])
{
write_seqlock(&net->ipv4.sysctl_local_ports.lock);
net->ipv4.sysctl_local_ports.range[0] = range[0];
net->ipv4.sysctl_local_ports.range[1] = range[1];
write_sequnlock(&net->ipv4.sysctl_local_ports.lock);
}
/* Validate changes from /proc interface. */
static int ipv4_local_port_range(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
struct net *net =
container_of(table->data, struct net, ipv4.sysctl_local_ports.range);
int ret;
int range[2];
struct ctl_table tmp = {
.data = &range,
.maxlen = sizeof(range),
.mode = table->mode,
.extra1 = &ip_local_port_range_min,
.extra2 = &ip_local_port_range_max,
};
inet_get_local_port_range(net, &range[0], &range[1]);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
if (range[1] < range[0])
ret = -EINVAL;
else
set_local_port_range(net, range);
}
return ret;
}
static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
{
kgid_t *data = table->data;
struct net *net =
container_of(table->data, struct net, ipv4.sysctl_ping_group_range);
unsigned int seq;
do {
seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock);
*low = data[0];
*high = data[1];
} while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq));
}
/* Update system visible IP port range */
static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
{
kgid_t *data = table->data;
struct net *net =
container_of(table->data, struct net, ipv4.sysctl_ping_group_range);
write_seqlock(&net->ipv4.sysctl_local_ports.lock);
data[0] = low;
data[1] = high;
write_sequnlock(&net->ipv4.sysctl_local_ports.lock);
}
/* Validate changes from /proc interface. */
static int ipv4_ping_group_range(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
struct user_namespace *user_ns = current_user_ns();
int ret;
gid_t urange[2];
kgid_t low, high;
struct ctl_table tmp = {
.data = &urange,
.maxlen = sizeof(urange),
.mode = table->mode,
.extra1 = &ip_ping_group_range_min,
.extra2 = &ip_ping_group_range_max,
};
inet_get_ping_group_range_table(table, &low, &high);
urange[0] = from_kgid_munged(user_ns, low);
urange[1] = from_kgid_munged(user_ns, high);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
low = make_kgid(user_ns, urange[0]);
high = make_kgid(user_ns, urange[1]);
if (!gid_valid(low) || !gid_valid(high) ||
(urange[1] < urange[0]) || gid_lt(high, low)) {
low = make_kgid(&init_user_ns, 1);
high = make_kgid(&init_user_ns, 0);
}
set_ping_group_range(table, low, high);
}
return ret;
}
static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char val[TCP_CA_NAME_MAX];
struct ctl_table tbl = {
.data = val,
.maxlen = TCP_CA_NAME_MAX,
};
int ret;
tcp_get_default_congestion_control(val);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0)
ret = tcp_set_default_congestion_control(val);
return ret;
}
static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
int ret;
tbl.data = kmalloc(tbl.maxlen, GFP_USER);
if (!tbl.data)
return -ENOMEM;
tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
kfree(tbl.data);
return ret;
}
static int proc_allowed_congestion_control(struct ctl_table *ctl,
int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
int ret;
tbl.data = kmalloc(tbl.maxlen, GFP_USER);
if (!tbl.data)
return -ENOMEM;
tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0)
ret = tcp_set_allowed_congestion_control(tbl.data);
kfree(tbl.data);
return ret;
}
static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt;
int ret;
u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
if (!tbl.data)
return -ENOMEM;
rcu_read_lock();
ctxt = rcu_dereference(tcp_fastopen_ctx);
if (ctxt)
memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
else
memset(user_key, 0, sizeof(user_key));
rcu_read_unlock();
snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
user_key[0], user_key[1], user_key[2], user_key[3]);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0) {
if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
user_key + 2, user_key + 3) != 4) {
ret = -EINVAL;
goto bad_key;
}
/* Generate a dummy secret but don't publish it. This
* is needed so we don't regenerate a new key on the
* first invocation of tcp_fastopen_cookie_gen
*/
tcp_fastopen_init_key_once(false);
tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
}
bad_key:
pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
user_key[0], user_key[1], user_key[2], user_key[3],
(char *)tbl.data, ret);
kfree(tbl.data);
return ret;
}
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
.data = &sysctl_tcp_timestamps,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_window_scaling",
.data = &sysctl_tcp_window_scaling,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_sack",
.data = &sysctl_tcp_sack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_retrans_collapse",
.data = &sysctl_tcp_retrans_collapse,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_default_ttl",
.data = &sysctl_ip_default_ttl,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &ip_ttl_min,
.extra2 = &ip_ttl_max,
},
{
.procname = "ip_nonlocal_bind",
.data = &sysctl_ip_nonlocal_bind,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_syn_retries",
.data = &sysctl_tcp_syn_retries,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &tcp_syn_retries_min,
.extra2 = &tcp_syn_retries_max
},
{
.procname = "tcp_synack_retries",
.data = &sysctl_tcp_synack_retries,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_max_orphans",
.data = &sysctl_tcp_max_orphans,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_max_tw_buckets",
.data = &tcp_death_row.sysctl_max_tw_buckets,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_early_demux",
.data = &sysctl_ip_early_demux,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_dynaddr",
.data = &sysctl_ip_dynaddr,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_keepalive_time",
.data = &sysctl_tcp_keepalive_time,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "tcp_keepalive_probes",
.data = &sysctl_tcp_keepalive_probes,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_keepalive_intvl",
.data = &sysctl_tcp_keepalive_intvl,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "tcp_retries1",
.data = &sysctl_tcp_retries1,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra2 = &tcp_retr1_max
},
{
.procname = "tcp_retries2",
.data = &sysctl_tcp_retries2,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_fin_timeout",
.data = &sysctl_tcp_fin_timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
#ifdef CONFIG_SYN_COOKIES
{
.procname = "tcp_syncookies",
.data = &sysctl_tcp_syncookies,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#endif
{
.procname = "tcp_fastopen",
.data = &sysctl_tcp_fastopen,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_fastopen_key",
.mode = 0600,
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
.proc_handler = proc_tcp_fastopen_key,
},
{
.procname = "tcp_tw_recycle",
.data = &tcp_death_row.sysctl_tw_recycle,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_abort_on_overflow",
.data = &sysctl_tcp_abort_on_overflow,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_stdurg",
.data = &sysctl_tcp_stdurg,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_rfc1337",
.data = &sysctl_tcp_rfc1337,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_max_syn_backlog",
.data = &sysctl_max_syn_backlog,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_local_reserved_ports",
.data = NULL, /* initialized in sysctl_ipv4_init */
.maxlen = 65536,
.mode = 0644,
.proc_handler = proc_do_large_bitmap,
},
{
.procname = "igmp_max_memberships",
.data = &sysctl_igmp_max_memberships,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "igmp_max_msf",
.data = &sysctl_igmp_max_msf,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "inet_peer_threshold",
.data = &inet_peer_threshold,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "inet_peer_minttl",
.data = &inet_peer_minttl,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "inet_peer_maxttl",
.data = &inet_peer_maxttl,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "tcp_orphan_retries",
.data = &sysctl_tcp_orphan_retries,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_fack",
.data = &sysctl_tcp_fack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_reordering",
.data = &sysctl_tcp_reordering,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_dsack",
.data = &sysctl_tcp_dsack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_mem",
.maxlen = sizeof(sysctl_tcp_mem),
.data = &sysctl_tcp_mem,
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "tcp_wmem",
.data = &sysctl_tcp_wmem,
.maxlen = sizeof(sysctl_tcp_wmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
{
.procname = "tcp_notsent_lowat",
.data = &sysctl_tcp_notsent_lowat,
.maxlen = sizeof(sysctl_tcp_notsent_lowat),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_rmem",
.data = &sysctl_tcp_rmem,
.maxlen = sizeof(sysctl_tcp_rmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
{
.procname = "tcp_app_win",
.data = &sysctl_tcp_app_win,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_adv_win_scale",
.data = &sysctl_tcp_adv_win_scale,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &tcp_adv_win_scale_min,
.extra2 = &tcp_adv_win_scale_max,
},
{
.procname = "tcp_tw_reuse",
.data = &sysctl_tcp_tw_reuse,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_frto",
.data = &sysctl_tcp_frto,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_low_latency",
.data = &sysctl_tcp_low_latency,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_no_metrics_save",
.data = &sysctl_tcp_nometrics_save,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_moderate_rcvbuf",
.data = &sysctl_tcp_moderate_rcvbuf,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_tso_win_divisor",
.data = &sysctl_tcp_tso_win_divisor,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_congestion_control",
.mode = 0644,
.maxlen = TCP_CA_NAME_MAX,
.proc_handler = proc_tcp_congestion_control,
},
{
.procname = "tcp_mtu_probing",
.data = &sysctl_tcp_mtu_probing,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_base_mss",
.data = &sysctl_tcp_base_mss,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_workaround_signed_windows",
.data = &sysctl_tcp_workaround_signed_windows,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_limit_output_bytes",
.data = &sysctl_tcp_limit_output_bytes,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_challenge_ack_limit",
.data = &sysctl_tcp_challenge_ack_limit,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#ifdef CONFIG_NET_DMA
{
.procname = "tcp_dma_copybreak",
.data = &sysctl_tcp_dma_copybreak,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#endif
{
.procname = "tcp_slow_start_after_idle",
.data = &sysctl_tcp_slow_start_after_idle,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
#ifdef CONFIG_NETLABEL
{
.procname = "cipso_cache_enable",
.data = &cipso_v4_cache_enabled,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "cipso_cache_bucket_size",
.data = &cipso_v4_cache_bucketsize,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "cipso_rbm_optfmt",
.data = &cipso_v4_rbm_optfmt,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "cipso_rbm_strictvalid",
.data = &cipso_v4_rbm_strictvalid,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#endif /* CONFIG_NETLABEL */
{
.procname = "tcp_available_congestion_control",
.maxlen = TCP_CA_BUF_MAX,
.mode = 0444,
.proc_handler = proc_tcp_available_congestion_control,
},
{
.procname = "tcp_allowed_congestion_control",
.maxlen = TCP_CA_BUF_MAX,
.mode = 0644,
.proc_handler = proc_allowed_congestion_control,
},
{
.procname = "tcp_thin_linear_timeouts",
.data = &sysctl_tcp_thin_linear_timeouts,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_thin_dupack",
.data = &sysctl_tcp_thin_dupack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_early_retrans",
.data = &sysctl_tcp_early_retrans,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &four,
},
{
.procname = "tcp_min_tso_segs",
.data = &sysctl_tcp_min_tso_segs,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &gso_max_segs,
},
{
.procname = "tcp_autocorking",
.data = &sysctl_tcp_autocorking,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
{
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "udp_rmem_min",
.data = &sysctl_udp_rmem_min,
.maxlen = sizeof(sysctl_udp_rmem_min),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one
},
{
.procname = "udp_wmem_min",
.data = &sysctl_udp_wmem_min,
.maxlen = sizeof(sysctl_udp_wmem_min),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &one
},
{ }
};
static struct ctl_table ipv4_net_table[] = {
{
.procname = "icmp_echo_ignore_all",
.data = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "icmp_echo_ignore_broadcasts",
.data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "icmp_ignore_bogus_error_responses",
.data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "icmp_errors_use_inbound_ifaddr",
.data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "icmp_ratelimit",
.data = &init_net.ipv4.sysctl_icmp_ratelimit,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
{
.procname = "icmp_ratemask",
.data = &init_net.ipv4.sysctl_icmp_ratemask,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ping_group_range",
.data = &init_net.ipv4.sysctl_ping_group_range,
.maxlen = sizeof(gid_t)*2,
.mode = 0644,
.proc_handler = ipv4_ping_group_range,
},
{
.procname = "tcp_ecn",
.data = &init_net.ipv4.sysctl_tcp_ecn,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_local_port_range",
.maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range),
.data = &init_net.ipv4.sysctl_local_ports.range,
.mode = 0644,
.proc_handler = ipv4_local_port_range,
},
{
.procname = "ip_no_pmtu_disc",
.data = &init_net.ipv4.sysctl_ip_no_pmtu_disc,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "ip_forward_use_pmtu",
.data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ }
};
static __net_init int ipv4_sysctl_init_net(struct net *net)
{
struct ctl_table *table;
table = ipv4_net_table;
if (!net_eq(net, &init_net)) {
int i;
table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
if (table == NULL)
goto err_alloc;
/* Update the variables to point into the current struct net */
for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++)
table[i].data += (void *)net - (void *)&init_net;
}
/*
* Sane defaults - nobody may create ping sockets.
* Boot scripts should set this to distro-specific group.
*/
net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1);
net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0);
/*
* Set defaults for local port range
*/
seqlock_init(&net->ipv4.sysctl_local_ports.lock);
net->ipv4.sysctl_local_ports.range[0] = 32768;
net->ipv4.sysctl_local_ports.range[1] = 61000;
net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
if (net->ipv4.ipv4_hdr == NULL)
goto err_reg;
return 0;
err_reg:
if (!net_eq(net, &init_net))
kfree(table);
err_alloc:
return -ENOMEM;
}
static __net_exit void ipv4_sysctl_exit_net(struct net *net)
{
struct ctl_table *table;
table = net->ipv4.ipv4_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
kfree(table);
}
static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
.init = ipv4_sysctl_init_net,
.exit = ipv4_sysctl_exit_net,
};
static __init int sysctl_ipv4_init(void)
{
struct ctl_table_header *hdr;
struct ctl_table *i;
for (i = ipv4_table; i->procname; i++) {
if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
i->data = sysctl_local_reserved_ports;
break;
}
}
if (!i->procname)
return -EINVAL;
hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
if (hdr == NULL)
return -ENOMEM;
if (register_pernet_subsys(&ipv4_sysctl_ops)) {
unregister_net_sysctl_table(hdr);
return -ENOMEM;
}
return 0;
}
__initcall(sysctl_ipv4_init);