From 8a6d2ea0cd121e3bfff4dbce5bc111874cf9e9d2 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 22 Jul 2008 21:53:40 -0700 Subject: [PATCH 1/8] sky2: don't stop queue on shutdown It is unnecessary, to stop queue and turn off carrier in shutdown routine. With new netdev_queue this causes warnings. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/sky2.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 711e4a8948e0..5257cf464f1a 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -1829,9 +1829,6 @@ static int sky2_down(struct net_device *dev) if (netif_msg_ifdown(sky2)) printk(KERN_INFO PFX "%s: disabling interface\n", dev->name); - /* Stop more packets from being queued */ - netif_stop_queue(dev); - /* Disable port IRQ */ imask = sky2_read32(hw, B0_IMSK); imask &= ~portirq_msk[port]; @@ -1887,8 +1884,6 @@ static int sky2_down(struct net_device *dev) sky2_phy_power_down(hw, port); - netif_carrier_off(dev); - /* turn off LED's */ sky2_write16(hw, B0_Y2LED, LED_STAT_OFF); From 5b3ab1dbd401b36ba2f9bfee2d2dae252fd62cd8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Jul 2008 14:01:29 -0700 Subject: [PATCH 2/8] netdev: Remove warning from __netif_schedule(). It isn't helping anything and we aren't going to be able to change all the drivers that do queue wakeups in strange situations. Just letting a noop_qdisc get scheduled will work because when qdisc_run() executes via net_tx_work() it will simply find no packets pending when it makes the ->dequeue() call in qdisc_restart. Signed-off-by: David S. Miller --- net/core/dev.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6bf217da9d8f..ccf97f9f37eb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1341,9 +1341,6 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) void __netif_schedule(struct Qdisc *q) { - if (WARN_ON_ONCE(q == &noop_qdisc)) - return; - if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) { struct softnet_data *sd; unsigned long flags; From b4942af65028c5eb516fdd9053020ccb2ee186ce Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 23 Jul 2008 14:06:04 -0700 Subject: [PATCH 3/8] net: Update entry in af_family_clock_key_strings In the merge phase of the CAN subsystem the af_family_clock_key_strings[] have been added to sock.c in commit 443aef0eddfa44c158d1b94ebb431a70638fcab4 (lockdep: fixup sk_callback_lock annotation). This trivial patch adds the missing name for address family 29 (AF_CAN). Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 10a64d57078c..91f8bbc93526 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -180,7 +180,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , - "clock-27" , "clock-28" , "clock-29" , + "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_MAX" }; From e8ebe3b893792887317bc24cc4608753f81b81d3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 23 Jul 2008 15:30:52 -0700 Subject: [PATCH 4/8] e1000e: fix e1000_netpoll(), remove extraneous e1000_clean_tx_irq() call Evgeniy Polyakov noticed that drivers/net/e1000e/netdev.c:e1000_netpoll() was calling e1000_clean_tx_irq() without taking the TX lock. David Miller suggested to remove the call altogether: since in this callpah there's periodic calls to ->poll() anyway which will do e1000_clean_tx_irq() and will garbage-collect any finished TX ring descriptors. This fix solved the e1000e+netconsole crashes i've been seeing: ============================================================================= BUG skbuff_head_cache: Poison overwritten ----------------------------------------------------------------------------- INFO: 0xf658ae9c-0xf658ae9c. First byte 0x6a instead of 0x6b INFO: Allocated in __alloc_skb+0x2c/0x110 age=0 cpu=0 pid=5098 INFO: Freed in __kfree_skb+0x31/0x80 age=0 cpu=1 pid=4440 INFO: Slab 0xc16cc140 objects=16 used=1 fp=0xf658ae00 flags=0x400000c3 INFO: Object 0xf658ae00 @offset=3584 fp=0xf658af00 Signed-off-by: Ingo Molnar Signed-off-by: David S. Miller --- drivers/net/e1000e/netdev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 869544b8c05c..9c0f56b3c518 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -4067,8 +4067,6 @@ static void e1000_netpoll(struct net_device *netdev) disable_irq(adapter->pdev->irq); e1000_intr(adapter->pdev->irq, netdev); - e1000_clean_tx_irq(adapter); - enable_irq(adapter->pdev->irq); } #endif From 4b53fb67e385b856a991d402096379dab462170a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Jul 2008 16:38:45 -0700 Subject: [PATCH 5/8] tcp: Clear probes_out more aggressively in tcp_ack(). This is based upon an excellent bug report from Eric Dumazet. tcp_ack() should clear ->icsk_probes_out even if there are packets outstanding. Otherwise if we get a sequence of ACKs while we do have packets outstanding over and over again, we'll never clear the probes_out value and eventually think the connection is too sick and we'll reset it. This appears to be some "optimization" added to tcp_ack() in the 2.4.x timeframe. In 2.2.x, probes_out is pretty much always cleared by tcp_ack(). Here is Eric's original report: ---------------------------------------- Apparently, we can in some situations reset TCP connections in a couple of seconds when some frames are lost. In order to reproduce the problem, please try the following program on linux-2.6.25.* Setup some iptables rules to allow two frames per second sent on loopback interface to tcp destination port 12000 iptables -N SLOWLO iptables -A SLOWLO -m hashlimit --hashlimit 2 --hashlimit-burst 1 --hashlimit-mode dstip --hashlimit-name slow2 -j ACCEPT iptables -A SLOWLO -j DROP iptables -A OUTPUT -o lo -p tcp --dport 12000 -j SLOWLO Then run the attached program and see the output : # ./loop State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,1) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,3) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,5) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,7) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,9) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,200ms,11) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,201ms,13) State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 40 127.0.0.1:54455 127.0.0.1:12000 timer:(persist,188ms,15) write(): Connection timed out wrote 890 bytes but was interrupted after 9 seconds ESTAB 0 0 127.0.0.1:12000 127.0.0.1:54455 Exiting read() because no data available (4000 ms timeout). read 860 bytes While this tcp session makes progress (sending frames with 50 bytes of payload, every 500ms), linux tcp stack decides to reset it, when tcp_retries 2 is reached (default value : 15) tcpdump : 15:30:28.856695 IP 127.0.0.1.56554 > 127.0.0.1.12000: S 33788768:33788768(0) win 32792 15:30:28.856711 IP 127.0.0.1.12000 > 127.0.0.1.56554: S 33899253:33899253(0) ack 33788769 win 32792 15:30:29.356947 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 1:61(60) ack 1 win 257 15:30:29.356966 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 61 win 257 15:30:29.866415 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 61:111(50) ack 1 win 257 15:30:29.866427 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 111 win 257 15:30:30.366516 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 111:161(50) ack 1 win 257 15:30:30.366527 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 161 win 257 15:30:30.876196 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 161:211(50) ack 1 win 257 15:30:30.876207 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 211 win 257 15:30:31.376282 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 211:261(50) ack 1 win 257 15:30:31.376290 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 261 win 257 15:30:31.885619 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 261:311(50) ack 1 win 257 15:30:31.885631 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 311 win 257 15:30:32.385705 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 311:361(50) ack 1 win 257 15:30:32.385715 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 361 win 257 15:30:32.895249 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 361:411(50) ack 1 win 257 15:30:32.895266 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 411 win 257 15:30:33.395341 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 411:461(50) ack 1 win 257 15:30:33.395351 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 461 win 257 15:30:33.918085 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 461:511(50) ack 1 win 257 15:30:33.918096 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 511 win 257 15:30:34.418163 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 511:561(50) ack 1 win 257 15:30:34.418172 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 561 win 257 15:30:34.927685 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 561:611(50) ack 1 win 257 15:30:34.927698 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 611 win 257 15:30:35.427757 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 611:661(50) ack 1 win 257 15:30:35.427766 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 661 win 257 15:30:35.937359 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 661:711(50) ack 1 win 257 15:30:35.937376 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 711 win 257 15:30:36.437451 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 711:761(50) ack 1 win 257 15:30:36.437464 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 761 win 257 15:30:36.947022 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 761:811(50) ack 1 win 257 15:30:36.947039 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 811 win 257 15:30:37.447135 IP 127.0.0.1.56554 > 127.0.0.1.12000: P 811:861(50) ack 1 win 257 15:30:37.447203 IP 127.0.0.1.12000 > 127.0.0.1.56554: . ack 861 win 257 15:30:41.448171 IP 127.0.0.1.12000 > 127.0.0.1.56554: F 1:1(0) ack 861 win 257 15:30:41.448189 IP 127.0.0.1.56554 > 127.0.0.1.12000: R 33789629:33789629(0) win 0 Source of program : /* * small producer/consumer program. * setup a listener on 127.0.0.1:12000 * Forks a child * child connect to 127.0.0.1, and sends 10 bytes on this tcp socket every 100 ms * Father accepts connection, and read all data */ #include #include #include #include #include #include #include int port = 12000; char buffer[4096]; int main(int argc, char *argv[]) { int lfd = socket(AF_INET, SOCK_STREAM, 0); struct sockaddr_in socket_address; time_t t0, t1; int on = 1, sfd, res; unsigned long total = 0; socklen_t alen = sizeof(socket_address); pid_t pid; time(&t0); socket_address.sin_family = AF_INET; socket_address.sin_port = htons(port); socket_address.sin_addr.s_addr = htonl(INADDR_LOOPBACK); if (lfd == -1) { perror("socket()"); return 1; } setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)); if (bind(lfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) { perror("bind"); close(lfd); return 1; } if (listen(lfd, 1) == -1) { perror("listen()"); close(lfd); return 1; } pid = fork(); if (pid == 0) { int i, cfd = socket(AF_INET, SOCK_STREAM, 0); close(lfd); if (connect(cfd, (struct sockaddr *)&socket_address, sizeof(socket_address)) == -1) { perror("connect()"); return 1; } for (i = 0 ; ;) { res = write(cfd, "blablabla\n", 10); if (res > 0) total += res; else if (res == -1) { perror("write()"); break; } else break; usleep(100000); if (++i == 10) { system("ss -on dst 127.0.0.1:12000"); i = 0; } } time(&t1); fprintf(stderr, "wrote %lu bytes but was interrupted after %g seconds\n", total, difftime(t1, t0)); system("ss -on | grep 127.0.0.1:12000"); close(cfd); return 0; } sfd = accept(lfd, (struct sockaddr *)&socket_address, &alen); if (sfd == -1) { perror("accept"); return 1; } close(lfd); while (1) { struct pollfd pfd[1]; pfd[0].fd = sfd; pfd[0].events = POLLIN; if (poll(pfd, 1, 4000) == 0) { fprintf(stderr, "Exiting read() because no data available (4000 ms timeout).\n"); break; } res = read(sfd, buffer, sizeof(buffer)); if (res > 0) total += res; else if (res == 0) break; else perror("read()"); } fprintf(stderr, "read %lu bytes\n", total); close(sfd); return 0; } ---------------------------------------- Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1f5e6049883e..75efd244f2af 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3292,6 +3292,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) * log. Something worked... */ sk->sk_err_soft = 0; + icsk->icsk_probes_out = 0; tp->rcv_tstamp = tcp_time_stamp; prior_packets = tp->packets_out; if (!prior_packets) @@ -3324,8 +3325,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) return 1; no_queue: - icsk->icsk_probes_out = 0; - /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. From 70eed75d76635ba7350651b9bd96529a306ec67a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 23 Jul 2008 16:42:42 -0700 Subject: [PATCH 6/8] netfilter: make security table depend on NETFILTER_ADVANCED Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/Kconfig | 2 +- net/ipv6/netfilter/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f23e60c93ef9..90eb7cb47e77 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -369,7 +369,7 @@ config IP_NF_SECURITY tristate "Security table" depends on IP_NF_IPTABLES depends on SECURITY - default m if NETFILTER_ADVANCED=n + depends on NETFILTER_ADVANCED help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 689dec899c57..0cfcce7b18d8 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -213,7 +213,7 @@ config IP6_NF_SECURITY tristate "Security table" depends on IP6_NF_IPTABLES depends on SECURITY - default m if NETFILTER_ADVANCED=n + depends on NETFILTER_ADVANCED help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. From 6f75a9b6426e686649ac440c37ec7c249501f9a5 Mon Sep 17 00:00:00 2001 From: Chas Williams Date: Wed, 23 Jul 2008 20:29:21 -0700 Subject: [PATCH 7/8] atm: [fore200e] use MODULE_FIRMWARE() and other suggested cleanups Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- drivers/atm/fore200e.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index d5c1bbfbe79d..73338d231db9 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -2562,7 +2562,8 @@ fore200e_load_and_start_fw(struct fore200e* fore200e) const struct firmware *firmware; struct device *device; struct fw_header *fw_header; - u32 *fw_data, fw_size; + const __le32 *fw_data; + u32 fw_size; u32 __iomem *load_addr; char buf[48]; int err = -ENODEV; @@ -2582,7 +2583,7 @@ fore200e_load_and_start_fw(struct fore200e* fore200e) return err; } - fw_data = (u32 *) firmware->data; + fw_data = (__le32 *) firmware->data; fw_size = firmware->size / sizeof(u32); fw_header = (struct fw_header *) firmware->data; load_addr = fore200e->virt_base + le32_to_cpu(fw_header->load_offset); @@ -3199,6 +3200,14 @@ static const struct fore200e_bus fore200e_bus[] = { {} }; -#ifdef MODULE_LICENSE MODULE_LICENSE("GPL"); +#ifdef CONFIG_PCI +#ifdef __LITTLE_ENDIAN__ +MODULE_FIRMWARE("pca200e.bin"); +#else +MODULE_FIRMWARE("pca200e_ecd.bin2"); +#endif +#endif /* CONFIG_PCI */ +#ifdef CONFIG_SBUS +MODULE_FIRMWARE("sba200e_ecd.bin2"); #endif From f867e6af94239a04ec23aeec2fcda5aa58e41db7 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 23 Jul 2008 21:34:27 -0700 Subject: [PATCH 8/8] pkt_sched: sch_sfq: dump a real number of flows Dump the "flows" number according to the number of active flows instead of repeating the "limit". Reported-by: Denys Fedoryshchenko Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 8589da666568..73f53844ce97 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -536,7 +536,14 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) opt.limit = q->limit; opt.divisor = SFQ_HASH_DIVISOR; - opt.flows = q->limit; + opt.flows = 0; + if (q->tail != SFQ_DEPTH) { + unsigned int i; + + for (i = 0; i < SFQ_HASH_DIVISOR; i++) + if (q->ht[i] != SFQ_DEPTH) + opt.flows++; + } NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);