mirror of
https://github.com/LineageOS/android_kernel_fxtec_sm6115.git
synced 2026-05-26 10:23:44 +00:00
[ Upstream commit 7c770dadfda5cbbde6aa3c4363ed513f1d212bf8 ] The patch cited in the Fixes tag below changed the teardown code for OVS ports to no longer unconditionally take the RTNL. After this change, the netdev_destroy() callback can proceed immediately to the call_rcu() invocation if the IFF_OVS_DATAPATH flag is already cleared on the netdev. The ovs_netdev_detach_dev() function clears the flag before completing the unregistration, and if it gets preempted after clearing the flag (as can happen on an -rt kernel), netdev_destroy() can complete and the device can be freed before the unregistration completes. This leads to a splat like: [ 998.393867] Oops: general protection fault, probably for non-canonical address 0xff00000001000239: 0000 [#1] SMP PTI [ 998.393877] CPU: 42 UID: 0 PID: 55177 Comm: ip Kdump: loaded Not tainted 6.12.0-211.1.1.el10_2.x86_64+rt #1 PREEMPT_RT [ 998.393886] Hardware name: Dell Inc. PowerEdge R740/0JMK61, BIOS 2.24.0 03/27/2025 [ 998.393889] RIP: 0010:dev_set_promiscuity+0x8d/0xa0 [ 998.393901] Code: 00 00 75 d8 48 8b 53 08 48 83 ba b0 02 00 00 00 75 ca 48 83 c4 08 5b c3 cc cc cc cc 48 83 bf 48 09 00 00 00 75 91 48 8b 47 08 <48> 83 b8 b0 02 00 00 00 74 97 eb 81 0f 1f 80 00 00 00 00 90 90 90 [ 998.393906] RSP: 0018:ffffce5864a5f6a0 EFLAGS: 00010246 [ 998.393912] RAX: ff00000000ffff89 RBX: ffff894d0adf5a05 RCX: 0000000000000000 [ 998.393917] RDX: 0000000000000000 RSI: 00000000ffffffff RDI: ffff894d0adf5a05 [ 998.393921] RBP: ffff894d19252000 R08: ffff894d19252000 R09: 0000000000000000 [ 998.393924] R10: ffff894d19252000 R11: ffff894d192521b8 R12: 0000000000000006 [ 998.393927] R13: ffffce5864a5f738 R14: 00000000ffffffe2 R15: 0000000000000000 [ 998.393931] FS: 00007fad61971800(0000) GS:ffff894cc0140000(0000) knlGS:0000000000000000 [ 998.393936] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 998.393940] CR2: 000055df0a2a6e40 CR3: 000000011c7fe003 CR4: 00000000007726f0 [ 998.393944] PKRU: 55555554 [ 998.393946] Call Trace: [ 998.393949] <TASK> [ 998.393952] ? show_trace_log_lvl+0x1b0/0x2f0 [ 998.393961] ? show_trace_log_lvl+0x1b0/0x2f0 [ 998.393975] ? dp_device_event+0x41/0x80 [openvswitch] [ 998.394009] ? __die_body.cold+0x8/0x12 [ 998.394016] ? die_addr+0x3c/0x60 [ 998.394027] ? exc_general_protection+0x16d/0x390 [ 998.394042] ? asm_exc_general_protection+0x26/0x30 [ 998.394058] ? dev_set_promiscuity+0x8d/0xa0 [ 998.394066] ? ovs_netdev_detach_dev+0x3a/0x80 [openvswitch] [ 998.394092] dp_device_event+0x41/0x80 [openvswitch] [ 998.394102] notifier_call_chain+0x5a/0xd0 [ 998.394106] unregister_netdevice_many_notify+0x51b/0xa60 [ 998.394110] rtnl_dellink+0x169/0x3e0 [ 998.394121] ? rt_mutex_slowlock.constprop.0+0x95/0xd0 [ 998.394125] rtnetlink_rcv_msg+0x142/0x3f0 [ 998.394128] ? avc_has_perm_noaudit+0x69/0xf0 [ 998.394130] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 998.394132] netlink_rcv_skb+0x50/0x100 [ 998.394138] netlink_unicast+0x292/0x3f0 [ 998.394141] netlink_sendmsg+0x21b/0x470 [ 998.394145] ____sys_sendmsg+0x39d/0x3d0 [ 998.394149] ___sys_sendmsg+0x9a/0xe0 [ 998.394156] __sys_sendmsg+0x7a/0xd0 [ 998.394160] do_syscall_64+0x7f/0x170 [ 998.394162] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 998.394165] RIP: 0033:0x7fad61bf4724 [ 998.394188] Code: 89 02 b8 ff ff ff ff eb bb 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 80 3d c5 e9 0c 00 00 74 13 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 89 54 24 1c 48 89 [ 998.394189] RSP: 002b:00007ffd7e2f7cb8 EFLAGS: 00000202 ORIG_RAX: 000000000000002e [ 998.394191] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fad61bf4724 [ 998.394193] RDX: 0000000000000000 RSI: 00007ffd7e2f7d20 RDI: 0000000000000003 [ 998.394194] RBP: 00007ffd7e2f7d90 R08: 0000000000000010 R09: 000000000000003f [ 998.394195] R10: 000055df11558010 R11: 0000000000000202 R12: 00007ffd7e2f8380 [ 998.394196] R13: 0000000069b233d7 R14: 000055df0a256040 R15: 0000000000000000 [ 998.394200] </TASK> To fix this, reorder the operations in ovs_netdev_detach_dev() to only clear the flag after completing the other operations, and introduce an smp_wmb() to make the ordering requirement explicit. The smp_wmb() is paired with a full smp_mb() in netdev_destroy() to make sure the call_rcu() invocation does not happen before the unregister operations are visible. Reported-by: Minxi Hou <mhou@redhat.com> Tested-by: Minxi Hou <mhou@redhat.com> Fixes: 549822767630 ("net: openvswitch: Avoid needlessly taking the RTNL on vport destroy") Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://patch.msgid.link/20260318155554.1133405-1-toke@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Ulrich Hecht <uli@kernel.org>
233 lines
5.5 KiB
C
233 lines
5.5 KiB
C
/*
|
|
* Copyright (c) 2007-2012 Nicira, Inc.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
* License as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
* 02110-1301, USA
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/if_arp.h>
|
|
#include <linux/if_bridge.h>
|
|
#include <linux/if_vlan.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/llc.h>
|
|
#include <linux/rtnetlink.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/openvswitch.h>
|
|
#include <linux/export.h>
|
|
|
|
#include <net/ip_tunnels.h>
|
|
#include <net/rtnetlink.h>
|
|
|
|
#include "datapath.h"
|
|
#include "vport.h"
|
|
#include "vport-internal_dev.h"
|
|
#include "vport-netdev.h"
|
|
|
|
static struct vport_ops ovs_netdev_vport_ops;
|
|
|
|
/* Must be called with rcu_read_lock. */
|
|
static void netdev_port_receive(struct sk_buff *skb)
|
|
{
|
|
struct vport *vport;
|
|
|
|
vport = ovs_netdev_get_vport(skb->dev);
|
|
if (unlikely(!vport))
|
|
goto error;
|
|
|
|
if (unlikely(skb_warn_if_lro(skb)))
|
|
goto error;
|
|
|
|
/* Make our own copy of the packet. Otherwise we will mangle the
|
|
* packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
|
|
*/
|
|
skb = skb_share_check(skb, GFP_ATOMIC);
|
|
if (unlikely(!skb))
|
|
return;
|
|
|
|
if (skb->dev->type == ARPHRD_ETHER) {
|
|
skb_push(skb, ETH_HLEN);
|
|
skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
|
|
}
|
|
ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
|
|
return;
|
|
error:
|
|
kfree_skb(skb);
|
|
}
|
|
|
|
/* Called with rcu_read_lock and bottom-halves disabled. */
|
|
static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
|
|
{
|
|
struct sk_buff *skb = *pskb;
|
|
|
|
if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
|
|
return RX_HANDLER_PASS;
|
|
|
|
netdev_port_receive(skb);
|
|
return RX_HANDLER_CONSUMED;
|
|
}
|
|
|
|
static struct net_device *get_dpdev(const struct datapath *dp)
|
|
{
|
|
struct vport *local;
|
|
|
|
local = ovs_vport_ovsl(dp, OVSP_LOCAL);
|
|
BUG_ON(!local);
|
|
return local->dev;
|
|
}
|
|
|
|
struct vport *ovs_netdev_link(struct vport *vport, const char *name)
|
|
{
|
|
int err;
|
|
|
|
vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), name);
|
|
if (!vport->dev) {
|
|
err = -ENODEV;
|
|
goto error_free_vport;
|
|
}
|
|
|
|
if (vport->dev->flags & IFF_LOOPBACK ||
|
|
(vport->dev->type != ARPHRD_ETHER &&
|
|
vport->dev->type != ARPHRD_NONE) ||
|
|
ovs_is_internal_dev(vport->dev)) {
|
|
err = -EINVAL;
|
|
goto error_put;
|
|
}
|
|
|
|
rtnl_lock();
|
|
err = netdev_master_upper_dev_link(vport->dev,
|
|
get_dpdev(vport->dp),
|
|
NULL, NULL, NULL);
|
|
if (err)
|
|
goto error_unlock;
|
|
|
|
err = netdev_rx_handler_register(vport->dev, netdev_frame_hook,
|
|
vport);
|
|
if (err)
|
|
goto error_master_upper_dev_unlink;
|
|
|
|
dev_disable_lro(vport->dev);
|
|
dev_set_promiscuity(vport->dev, 1);
|
|
vport->dev->priv_flags |= IFF_OVS_DATAPATH;
|
|
rtnl_unlock();
|
|
|
|
return vport;
|
|
|
|
error_master_upper_dev_unlink:
|
|
netdev_upper_dev_unlink(vport->dev, get_dpdev(vport->dp));
|
|
error_unlock:
|
|
rtnl_unlock();
|
|
error_put:
|
|
dev_put(vport->dev);
|
|
error_free_vport:
|
|
ovs_vport_free(vport);
|
|
return ERR_PTR(err);
|
|
}
|
|
EXPORT_SYMBOL_GPL(ovs_netdev_link);
|
|
|
|
static struct vport *netdev_create(const struct vport_parms *parms)
|
|
{
|
|
struct vport *vport;
|
|
|
|
vport = ovs_vport_alloc(0, &ovs_netdev_vport_ops, parms);
|
|
if (IS_ERR(vport))
|
|
return vport;
|
|
|
|
return ovs_netdev_link(vport, parms->name);
|
|
}
|
|
|
|
static void vport_netdev_free(struct rcu_head *rcu)
|
|
{
|
|
struct vport *vport = container_of(rcu, struct vport, rcu);
|
|
|
|
if (vport->dev)
|
|
dev_put(vport->dev);
|
|
ovs_vport_free(vport);
|
|
}
|
|
|
|
void ovs_netdev_detach_dev(struct vport *vport)
|
|
{
|
|
ASSERT_RTNL();
|
|
netdev_rx_handler_unregister(vport->dev);
|
|
netdev_upper_dev_unlink(vport->dev,
|
|
netdev_master_upper_dev_get(vport->dev));
|
|
dev_set_promiscuity(vport->dev, -1);
|
|
|
|
/* paired with smp_mb() in netdev_destroy() */
|
|
smp_wmb();
|
|
|
|
vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
|
|
}
|
|
|
|
static void netdev_destroy(struct vport *vport)
|
|
{
|
|
rtnl_lock();
|
|
if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
|
|
ovs_netdev_detach_dev(vport);
|
|
rtnl_unlock();
|
|
|
|
/* paired with smp_wmb() in ovs_netdev_detach_dev() */
|
|
smp_mb();
|
|
|
|
call_rcu(&vport->rcu, vport_netdev_free);
|
|
}
|
|
|
|
void ovs_netdev_tunnel_destroy(struct vport *vport)
|
|
{
|
|
rtnl_lock();
|
|
if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
|
|
ovs_netdev_detach_dev(vport);
|
|
|
|
/* We can be invoked by both explicit vport deletion and
|
|
* underlying netdev deregistration; delete the link only
|
|
* if it's not already shutting down.
|
|
*/
|
|
if (vport->dev->reg_state == NETREG_REGISTERED)
|
|
rtnl_delete_link(vport->dev);
|
|
dev_put(vport->dev);
|
|
vport->dev = NULL;
|
|
rtnl_unlock();
|
|
|
|
call_rcu(&vport->rcu, vport_netdev_free);
|
|
}
|
|
EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy);
|
|
|
|
/* Returns null if this device is not attached to a datapath. */
|
|
struct vport *ovs_netdev_get_vport(struct net_device *dev)
|
|
{
|
|
if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
|
|
return (struct vport *)
|
|
rcu_dereference_rtnl(dev->rx_handler_data);
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
static struct vport_ops ovs_netdev_vport_ops = {
|
|
.type = OVS_VPORT_TYPE_NETDEV,
|
|
.create = netdev_create,
|
|
.destroy = netdev_destroy,
|
|
.send = dev_queue_xmit,
|
|
};
|
|
|
|
int __init ovs_netdev_init(void)
|
|
{
|
|
return ovs_vport_ops_register(&ovs_netdev_vport_ops);
|
|
}
|
|
|
|
void ovs_netdev_exit(void)
|
|
{
|
|
ovs_vport_ops_unregister(&ovs_netdev_vport_ops);
|
|
}
|