BACKPORT: net: sk_msg: Simplify sk_psock initialization

Initializing psock->sk_proto and other saved callbacks is only
done in sk_psock_update_proto, after sk_psock_init has returned.
The logic for this is difficult to follow, and needlessly complex.

Instead, initialize psock->sk_proto whenever we allocate a new
psock. Additionally, assert the following invariants:

* The SK has no ULP: ULP does it's own finagling of sk->sk_prot
* sk_user_data is unused: we need it to store sk_psock

Protect our access to sk_user_data with sk_callback_lock, which
is what other users like reuseport arrays, etc. do.

The result is that an sk_psock is always fully initialized, and
that psock->sk_proto is always the "original" struct proto.
The latter allows us to use psock->sk_proto when initializing
IPv6 TCP / UDP callbacks for sockmap.

Also fix skmsg.c compilation, and align include/linux/bpf.h

Change-Id: If8d7b6a9e3b7341d0dba5705e0f2044234f39fd3
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200821102948.21918-2-lmb@cloudflare.com
This commit is contained in:
Lorenz Bauer
2020-08-21 11:29:43 +01:00
committed by Nolen Johnson
parent d9f1adb52c
commit ff9f2d9cda
7 changed files with 54 additions and 54 deletions

View File

@ -1,8 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#ifndef _LINUX_BPF_H
#define _LINUX_BPF_H 1
@ -1479,11 +1476,6 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
const char *func_name,
struct btf_func_model *m);
static inline bool unprivileged_ebpf_enabled(void)
{
return !sysctl_unprivileged_bpf_disabled;
}
struct bpf_reg_state;
int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *regs);
@ -1496,6 +1488,12 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
static inline bool unprivileged_ebpf_enabled(void)
{
return !sysctl_unprivileged_bpf_disabled;
}
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@ -1702,6 +1700,7 @@ static inline bool unprivileged_ebpf_enabled(void)
{
return false;
}
#endif /* CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,

View File

@ -360,23 +360,6 @@ static inline void sk_psock_update_proto(struct sock *sk,
struct sk_psock *psock,
struct proto *ops)
{
/* Initialize saved callbacks and original proto only once, since this
* function may be called multiple times for a psock, e.g. when
* psock->progs.msg_parser is updated.
*
* Since we've not installed the new proto, psock is not yet in use and
* we can initialize it without synchronization.
*/
if (!psock->sk_proto) {
struct proto *orig = READ_ONCE(sk->sk_prot);
psock->saved_unhash = orig->unhash;
psock->saved_close = orig->close;
psock->saved_write_space = sk->sk_write_space;
psock->sk_proto = orig;
}
/* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, ops);
}

View File

@ -321,4 +321,9 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
struct sock *sk);
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
static inline bool inet_csk_has_ulp(struct sock *sk)
{
return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
}
#endif /* _INET_CONNECTION_SOCK_H */

View File

@ -482,15 +482,35 @@ end:
struct sk_psock *sk_psock_init(struct sock *sk, int node)
{
struct sk_psock *psock = kzalloc_node(sizeof(*psock),
GFP_ATOMIC | __GFP_NOWARN,
node);
if (!psock)
return NULL;
struct sk_psock *psock;
struct proto *prot;
write_lock_bh(&sk->sk_callback_lock);
if (inet_csk_has_ulp(sk)) {
psock = ERR_PTR(-EINVAL);
goto out;
}
if (sk->sk_user_data) {
psock = ERR_PTR(-EBUSY);
goto out;
}
psock = kzalloc_node(sizeof(*psock), GFP_ATOMIC | __GFP_NOWARN, node);
if (!psock) {
psock = ERR_PTR(-ENOMEM);
goto out;
}
prot = READ_ONCE(sk->sk_prot);
psock->sk = sk;
psock->eval = __SK_NONE;
psock->sk_proto = prot;
psock->saved_unhash = prot->unhash;
psock->saved_destroy = prot->destroy;
psock->saved_close = prot->close;
psock->saved_write_space = sk->sk_write_space;
INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
@ -505,6 +525,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
rcu_assign_sk_user_data_nocopy(sk, psock);
sock_hold(sk);
out:
write_unlock_bh(&sk->sk_callback_lock);
return psock;
}
EXPORT_SYMBOL_GPL(sk_psock_init);
@ -735,8 +757,6 @@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
static void sk_psock_verdict_apply(struct sk_psock *psock,
struct sk_buff *skb, int verdict)
{
struct sock *sk_other;
switch (verdict) {
case __SK_REDIRECT:
sk_psock_skb_redirect(skb);
@ -744,8 +764,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
case __SK_DROP:
/* fall-through */
default:
out_free:
kfree_skb(skb);
break;
}
}

View File

@ -186,8 +186,6 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
{
struct proto *prot;
sock_owned_by_me(sk);
switch (sk->sk_type) {
case SOCK_STREAM:
prot = tcp_bpf_get_proto(sk, psock);
@ -274,8 +272,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
}
} else {
psock = sk_psock_init(sk, map->numa_node);
if (!psock) {
ret = -ENOMEM;
if (IS_ERR(psock)) {
ret = PTR_ERR(psock);
goto out_progs;
}
}
@ -327,8 +325,8 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
if (!psock) {
psock = sk_psock_init(sk, map->numa_node);
if (!psock)
return -ENOMEM;
if (IS_ERR(psock))
return PTR_ERR(psock);
}
ret = sock_map_init_proto(sk, psock);

View File

@ -556,10 +556,9 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage;
}
static void tcp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
static void tcp_bpf_check_v6_needs_rebuild(struct proto *ops)
{
if (sk->sk_family == AF_INET6 &&
unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
if (unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
spin_lock_bh(&tcpv6_prot_lock);
if (likely(ops != tcpv6_prot_saved)) {
tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV6], ops);
@ -592,13 +591,11 @@ struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
if (!psock->sk_proto) {
struct proto *ops = READ_ONCE(sk->sk_prot);
if (tcp_bpf_assert_proto_ops(ops))
if (sk->sk_family == AF_INET6) {
if (tcp_bpf_assert_proto_ops(psock->sk_proto))
return ERR_PTR(-EINVAL);
tcp_bpf_check_v6_needs_rebuild(sk, ops);
tcp_bpf_check_v6_needs_rebuild(psock->sk_proto);
}
return &tcp_bpf_prots[family][config];

View File

@ -22,10 +22,9 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
prot->close = sock_map_close;
}
static void udp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)
{
if (sk->sk_family == AF_INET6 &&
unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) {
if (unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) {
spin_lock_bh(&udpv6_prot_lock);
if (likely(ops != udpv6_prot_saved)) {
udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV6], ops);
@ -46,8 +45,8 @@ struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
{
int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
if (!psock->sk_proto)
udp_bpf_check_v6_needs_rebuild(sk, READ_ONCE(sk->sk_prot));
if (sk->sk_family == AF_INET6)
udp_bpf_check_v6_needs_rebuild(psock->sk_proto);
return &udp_bpf_prots[family];
}