Received: by oss.sgi.com id ; Mon, 19 Jun 2000 03:55:31 -0700 Received: from smtprch2.nortelnetworks.com ([192.135.215.15]:28384 "EHLO smtprch2.nortel.com") by oss.sgi.com with ESMTP id ; Mon, 19 Jun 2000 03:55:11 -0700 Received: from zrchb213.us.nortel.com (actually zrchb213) by smtprch2.nortel.com; Mon, 19 Jun 2000 05:51:30 -0500 Received: from zctwb003.asiapac.nortel.com ([47.152.32.111]) by zrchb213.us.nortel.com with SMTP (Microsoft Exchange Internet Mail Service Version 5.5.2650.21) id MPD1Y7R6; Mon, 19 Jun 2000 05:54:35 -0500 Received: from pwold011.asiapac.nortel.com ([47.181.193.45]) by zctwb003.asiapac.nortel.com with SMTP (Microsoft Exchange Internet Mail Service Version 5.5.2650.21) id NCLF8A9B; Mon, 19 Jun 2000 20:54:34 +1000 Received: from uow.edu.au (IDENT:akpm@[47.181.194.90]) by pwold011.asiapac.nortel.com (8.9.3/8.9.3) with ESMTP id UAA05423; Mon, 19 Jun 2000 20:54:05 +1000 Message-ID: <394DFCD5.41FCD467@uow.edu.au> Date: Mon, 19 Jun 2000 20:58:29 +1000 X-Sybari-Space: 00000000 00000000 00000000 From: Andrew Morton X-Mailer: Mozilla 4.7 [en] (X11; I; Linux 2.2.14-15mdk i586) X-Accept-Language: en MIME-Version: 1.0 To: Alan Cox CC: "David S. Miller" , netdev@oss.sgi.com, kuznet@ms2.inr.ac.ru, Andi Kleen Subject: Re: [patch] TCP throughput after 2.2.17-pre1 References: <200006181426.HAA04840@pizda.ninka.net> from "David S. Miller" at Jun 18, 2000 07:26:13 AM <200006181447.KAA07398@devserv.devel.redhat.com> Content-Type: multipart/mixed; boundary="------------9CBB049786F372AA80D19CAF" X-Orig: Sender: owner-netdev@oss.sgi.com Precedence: bulk Return-Path: X-Orcpt: rfc822;netdev-outgoing This is a multi-part message in MIME format. --------------9CBB049786F372AA80D19CAF Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Andi's patch works for me. I've attached the 2.2.17-pre4 version here. Alan Cox wrote: > > and also > the new code path waking on the socket kfreeing a buffer. He didn't appear to do that bit. It just polls. The sleep_on() would be good to have; wait_for_tcp_memory() will be called quite often for the non-oom case. --------------9CBB049786F372AA80D19CAF Content-Type: text/plain; charset=us-ascii; name="tcp.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="tcp.patch" --- linux-2.2.17pre4/include/net/sock.h Tue Aug 10 05:05:13 1999 +++ linux-akpm/include/net/sock.h Mon Jun 19 19:03:07 2000 @@ -717,6 +717,10 @@ extern struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority); + +extern struct sk_buff *sock_wmalloc_err(struct sock *sk, + unsigned long size, int force, + int priority, int *err); extern struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority); --- linux-2.2.17pre4/include/net/snmp.h Mon Oct 5 03:19:39 1998 +++ linux-akpm/include/net/snmp.h Mon Jun 19 19:03:07 2000 @@ -178,6 +178,7 @@ unsigned long OfoPruned; unsigned long OutOfWindowIcmps; unsigned long LockDroppedIcmps; + unsigned long SockMallocOOM; }; #endif --- linux-2.2.17pre4/net/core/sock.c Tue May 11 02:55:25 1999 +++ linux-akpm/net/core/sock.c Mon Jun 19 19:03:07 2000 @@ -566,6 +566,31 @@ skb->sk = sk; return skb; } + net_statistics.SockMallocOOM++; + } + return NULL; +} + +/* + * Allocate memory from the sockets send buffer, telling caller about real OOM. + * err is only set for oom, not for socket buffer overflow. + */ +struct sk_buff *sock_wmalloc_err(struct sock *sk, unsigned long size, int force, int priority, int *err) +{ + *err = 0; + /* Note: overcommitment possible */ + if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) { + struct sk_buff * skb; + *err = -ENOMEM; + skb = alloc_skb(size, priority); + if (skb) { + *err = 0; + atomic_add(skb->truesize, &sk->wmem_alloc); + skb->destructor = sock_wfree; + skb->sk = sk; + return skb; + } + net_statistics.SockMallocOOM++; } return NULL; } @@ -583,6 +608,7 @@ skb->sk = sk; return skb; } + net_statistics.SockMallocOOM++; } return NULL; } @@ -602,6 +628,7 @@ if (mem) return mem; atomic_sub(size, &sk->omem_alloc); + net_statistics.SockMallocOOM++; } return NULL; } --- linux-2.2.17pre4/net/ipv4/tcp.c Sun Jun 18 21:04:07 2000 +++ linux-akpm/net/ipv4/tcp.c Mon Jun 19 20:40:33 2000 @@ -697,40 +697,38 @@ } /* - * Wait for more memory for a socket - * - * If we got here an allocation has failed on us. We cannot - * spin here or we may block the very code freeing memory - * for us. + * Wait for more memory for a socket. + * Special case is err == -ENOMEM, in this case just sleep a bit waiting + * for the system to free up some memory. */ -static void wait_for_tcp_memory(struct sock * sk) +static void wait_for_tcp_memory(struct sock * sk, int err) { release_sock(sk); if (!tcp_memory_free(sk)) { struct wait_queue wait = { current, NULL }; + sk->socket->flags &= ~SO_NOSPACE; add_wait_queue(sk->sleep, &wait); for (;;) { if (signal_pending(current)) break; current->state = TASK_INTERRUPTIBLE; - if (tcp_memory_free(sk)) + if (tcp_memory_free(sk) && !err) break; if (sk->shutdown & SEND_SHUTDOWN) break; if (sk->err) break; - schedule(); + if (!err) + schedule(); + else { + schedule_timeout(1); + break; + } } current->state = TASK_RUNNING; remove_wait_queue(sk->sleep, &wait); } - else - { - /* Yield time to the memory freeing paths */ - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } lock_sock(sk); } @@ -924,7 +922,7 @@ tmp += copy; queue_it = 0; } - skb = sock_wmalloc(sk, tmp, 0, GFP_KERNEL); + skb = sock_wmalloc_err(sk, tmp, 0, GFP_KERNEL, &err); /* If we didn't get any memory, we need to sleep. */ if (skb == NULL) { @@ -937,8 +935,10 @@ err = -ERESTARTSYS; goto do_interrupted; } - tcp_push_pending_frames(sk, tp); - wait_for_tcp_memory(sk); + /* In OOM that would fail anyways so do not bother. */ + if (!err) + tcp_push_pending_frames(sk, tp); + wait_for_tcp_memory(sk, err); /* If SACK's were formed or PMTU events happened, * we must find out about it. --- linux-2.2.17pre4/net/ipv4/proc.c Fri Jun 16 23:48:00 2000 +++ linux-akpm/net/ipv4/proc.c Mon Jun 19 19:03:07 2000 @@ -359,8 +359,8 @@ len = sprintf(buffer, "TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed" " EmbryonicRsts PruneCalled RcvPruned OfoPruned" - " OutOfWindowIcmps LockDroppedIcmps\n" - "TcpExt: %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", + " OutOfWindowIcmps LockDroppedIcmps SockMallocOOM\n" + "TcpExt: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", net_statistics.SyncookiesSent, net_statistics.SyncookiesRecv, net_statistics.SyncookiesFailed, @@ -369,7 +369,8 @@ net_statistics.RcvPruned, net_statistics.OfoPruned, net_statistics.OutOfWindowIcmps, - net_statistics.LockDroppedIcmps); + net_statistics.LockDroppedIcmps, + net_statistics.SockMallocOOM); if (offset >= len) { --------------9CBB049786F372AA80D19CAF--