net.ipv4.tcp_wmem not work

net.ipv4.tcp_wmem not work

经常发现 net.ipv4.tcp_wmem 没有作用. 这个参数的修改一般, 都看不出效果.
感觉很奇怪, 文档上写的很明白. 但是就是不启效果.

今天, 看代码的时候, 突然想到, accept 的 socket 的 sndbuf 是不是继承自 listen socket.
之前发现过这一特点. 新的 socket 有一些是继承自 listen socket 的.

实验

systemtap

probe kernel.function("tcp_sendmsg")
{
    if (execname() == "test")
        printf("%s sk_sndbuf: %d \n", execname(), $sk->sk_sndbuf);
}

server code

/**
 *   author       :   丁雪峰
 *   time         :   2016-08-31 09:37:57
 *   email        :   fengidri@yeah.net
 *   version      :   1.0.1
 *   description  :
 */
#include <stdio.h>
#include <stddef.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <malloc.h>
#include "sws.h"

int main()
{
    int  s = sws_net_server("127.0.0.1", 9999, 0, 1);
    if (s < 0)
    {
        printf("sws_net_server fail\n");
        return -1;
    }

    while (1)
    {
        int fd = accept(s, NULL, 0);
        if (fd < 0)
        {
            printf("accept err\n");
            continue;
        }
        sws_net_noblock(fd, 1);

        int size = 1024 * 1024 * 20;
        char *buf = malloc(size);


        int n = write(fd, buf, size);
        printf("write: %d\n", n);
        close(fd);
    }
}

compile:

gcc test.c -L ../sws -I ../sws -lsws -g -o test

sws 是我的一个简单的代码库.

操作

nc 127.0.0.1 9999 > /dev/null -v

反复操作上面的命令, stap 会打印出在 tcp_sendmsg 的下的 sk_sndbuf, server 的代码会打印出成功写入到 socket 的
数据的大小. 可以看到两者的大小是相对应.

但是这个时候, 如果调整了 net.ipv4.tcp_wmem, stap 的输出值并不会变化.

但是重启 test 程序, stap 的输出值就是新的调整的值了.

这就可以证明前面的猜测了.

kernel code

如下的代码是, accept api 对应的内核代码, 创建新的 socket 的时候, 会调用 sctp_copy_sock.
这个函数里会把 sk_sndbuf, sk_rcvbuf
之类的值都复制到新的 socket.

/* Create and initialize a new sk for the socket returned by accept(). */
static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
					     struct sctp_association *asoc)
{
	struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
			sk->sk_prot);
	struct inet_sock *newinet;

	if (!newsk)
		goto out;

	sock_init_data(NULL, newsk);

	sctp_copy_sock(newsk, sk, asoc);
	sock_reset_flag(newsk, SOCK_ZAPPED);

	newinet = inet_sk(newsk);

	newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;

	sk_refcnt_debug_inc(newsk);

	if (newsk->sk_prot->init(newsk)) {
		sk_common_release(newsk);
		newsk = NULL;
	}

out:
	return newsk;
}

void sctp_copy_sock(struct sock *newsk, struct sock *sk,
		    struct sctp_association *asoc)
{
	struct inet_sock *inet = inet_sk(sk);
	struct inet_sock *newinet;

	newsk->sk_type = sk->sk_type;
	newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
	newsk->sk_flags = sk->sk_flags;
	newsk->sk_no_check_tx = sk->sk_no_check_tx;
	newsk->sk_no_check_rx = sk->sk_no_check_rx;
	newsk->sk_reuse = sk->sk_reuse;

	newsk->sk_shutdown = sk->sk_shutdown;
	newsk->sk_destruct = sctp_destruct_sock;
	newsk->sk_family = sk->sk_family;
	newsk->sk_protocol = IPPROTO_SCTP;
	newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
	newsk->sk_sndbuf = sk->sk_sndbuf;
	newsk->sk_rcvbuf = sk->sk_rcvbuf;
	newsk->sk_lingertime = sk->sk_lingertime;
	newsk->sk_rcvtimeo = sk->sk_rcvtimeo;
	newsk->sk_sndtimeo = sk->sk_sndtimeo;

	newinet = inet_sk(newsk);

	/* Initialize sk's sport, dport, rcv_saddr and daddr for
	 * getsockname() and getpeername()
	 */
	newinet->inet_sport = inet->inet_sport;
	newinet->inet_saddr = inet->inet_saddr;
	newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
	newinet->inet_dport = htons(asoc->peer.port);
	newinet->pmtudisc = inet->pmtudisc;
	newinet->inet_id = asoc->next_tsn ^ jiffies;

	newinet->uc_ttl = inet->uc_ttl;
	newinet->mc_loop = 1;
	newinet->mc_ttl = 1;
	newinet->mc_index = 0;
	newinet->mc_list = NULL;
}


发表评论

邮箱地址不会被公开。 必填项已用*标注