Files
Thomas Turner 1dd9f9066b Merge tag 'v4.19.325-cip126' of https://git.kernel.org/pub/scm/linux/kernel/git/cip/linux-cip into android13-4.19-kona
version 4.19.325-cip126

* tag 'v4.19.325-cip126' of https://git.kernel.org/pub/scm/linux/kernel/git/cip/linux-cip:
  CIP: Bump version suffix to -cip126 after merge from cip/linux-4.19.y-st tree
  net: ravb: Ensure memory write completes before ringing TX doorbell
  Update localversion-st, tree is up-to-date with 5.4.301.
  net: ravb: Ensure memory write completes before ringing TX doorbell
  net/ip6_tunnel: Prevent perpetual tunnel growth
  tracing: Fix race condition in kprobe initialization causing NULL pointer dereference
  rtc: interface: Ensure alarm irq is enabled when UIE is enabled
  tpm_tis: Fix incorrect arguments in tpm_tis_probe_irq_single
  media: s5p-mfc: remove an unused/uninitialized variable
  NFSD: Fix last write offset handling in layoutcommit
  NFSD: Minor cleanup in layoutcommit processing
  KEYS: trusted_tpm1: Compare HMAC values in constant time
  NFSD: Define a proc_layoutcommit for the FlexFiles layout type
  vfs: Don't leak disconnected dentries on umount
  ext4: detect invalid INLINE_DATA + EXTENTS flag combination
  drm/amdgpu: use atomic functions with memory barriers for vm fault info
  ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()
  spi: cadence-quadspi: Flush posted register writes before DAC access
  spi: cadence-quadspi: Flush posted register writes before INDAC access
  memory: samsung: exynos-srom: Fix of_iomap leak in exynos_srom_probe
  memory: samsung: exynos-srom: Correct alignment
  arm64: cputype: Add Neoverse-V3AE definitions
  comedi: fix divide-by-zero in comedi_buf_munge()
  binder: remove "invalid inc weak" check
  xhci: dbc: enable back DbC in resume if it was enabled before suspend
  usb/core/quirks: Add Huawei ME906S to wakeup quirk
  USB: serial: option: add Telit FN920C04 ECM compositions
  USB: serial: option: add Quectel RG255C
  USB: serial: option: add UNISOC UIS7720
  net: usb: rtl8150: Fix frame padding
  ocfs2: clear extent cache after moving/defragmenting extents
  MIPS: Malta: Fix keyboard resource preventing i8042 driver from registering
  Revert "cpuidle: menu: Avoid discarding useful information"
  sctp: avoid NULL dereference when chunk data buffer is missing
  arm64, mm: avoid always making PTE dirty in pte_mkwrite()
  net: add ndo_fdb_del_bulk
  net: netlink: add NLM_F_BULK delete request modifier
  net: rtnetlink: use BIT for flag values
  net: rtnetlink: add helper to extract msg type's kind
  net: rtnetlink: add msg kind names
  net: rtnetlink: remove redundant assignment to variable err
  m68k: bitops: Fix find_*_bit() signatures
  hfsplus: return EIO when type of hidden directory mismatch in hfsplus_fill_super()
  hfs: fix KMSAN uninit-value issue in hfs_find_set_zero_bits()
  dlm: check for defined force value in dlm_lockspace_release
  hfsplus: fix KMSAN uninit-value issue in hfsplus_delete_cat()
  hfs: validate record offset in hfsplus_bmap_alloc
  hfsplus: fix KMSAN uninit-value issue in __hfsplus_ext_cache_extent()
  hfs: make proper initalization of struct hfs_find_data
  hfs: clear offset and space out of valid records in b-tree node
  exec: Fix incorrect type for ret
  hfsplus: fix slab-out-of-bounds read in hfsplus_strcasecmp()
  tls: always set record_type in tls_process_cmsg
  tg3: prevent use of uninitialized remote_adv and local_adv variables
  amd-xgbe: Avoid spurious link down messages during interface toggle
  net: dlink: handle dma_map_single() failure properly
  net: dl2k: switch from 'pci_' to 'dma_' API
  xen/events: Update virq_to_irq on migration
  media: lirc: Fix error handling in lirc_register()
  media: rc: Directly use ida_free()
  drm/exynos: exynos7_drm_decon: remove ctx->suspended
  btrfs: avoid potential out-of-bounds in btrfs_encode_fh()
  pwm: berlin: Fix wrong register in suspend/resume
  media: cx18: Add missing check after DMA map
  xen/events: Cleanup find_virq() return codes
  cramfs: Verify inode mode when loading from disk
  pid: Add a judgment for ns null in pid_nr_ns
  minixfs: Verify inode mode when loading from disk
  mfd: intel_soc_pmic_chtdc_ti: Drop unneeded assignment for cache_type
  mfd: intel_soc_pmic_chtdc_ti: Fix invalid regmap-config max_register value
  Squashfs: reject negative file sizes in squashfs_read_inode()
  Squashfs: add additional inode sanity checking
  mfd: vexpress-sysreg: Check the return value of devm_gpiochip_add_data()
  fs: udf: fix OOB read in lengthAllocDescs handling
  KVM: x86: Don't (re)check L1 intercepts when completing userspace I/O
  net/9p: fix double req put in p9_fd_cancelled
  ext4: guard against EA inode refcount underflow in xattr update
  ext4: correctly handle queries for metadata mappings
  ext4: increase i_disksize to offset + len in ext4_update_disksize_before_punch()
  nfsd: nfserr_jukebox in nlm_fopen should lead to a retry
  x86/umip: Fix decoding of register forms of 0F 01 (SGDT and SIDT aliases)
  x86/umip: Check that the instruction opcode is at least two bytes
  PCI/AER: Fix missing uevent on recovery when a reset is requested
  rtc: interface: Fix long-standing race when setting alarm
  mmc: core: SPI mode remove cmd7
  mtd: rawnand: fsmc: Default to autodetect buswidth
  sparc64: fix hugetlb for sun4u
  sctp: Fix MAC comparison to be constant-time
  scsi: hpsa: Fix potential memory leak in hpsa_big_passthru_ioctl()
  parisc: don't reference obsolete termio struct for TC* constants
  lib/genalloc: fix device leak in of_gen_pool_get()
  iio: frequency: adf4350: Fix prescaler usage.
  iio: dac: ad5421: use int type to store negative error codes
  iio: dac: ad5360: use int type to store negative error codes
  crypto: atmel - Fix dma_unmap_sg() direction
  drm/nouveau: fix bad ret code in nouveau_bo_move_prep
  media: i2c: mt9v111: fix incorrect type for ret
  ACPI: debug: fix signedness issues in read/write helpers
  tools build: Align warning options with perf
  net: fsl_pq_mdio: Fix device node reference leak in fsl_pq_mdio_probe
  tcp: Don't call reqsk_fastopen_remove() in tcp_conn_request().
  net/sctp: fix a null dereference in sctp_disposition sctp_sf_do_5_1D_ce()
  net/mlx4: prevent potential use after free in mlx4_en_do_uc_filter()
  scsi: mvsas: Fix use-after-free bugs in mvs_work_queue
  clk: nxp: Fix pll0 rate check condition in LPC18xx CGU driver
  clk: nxp: lpc18xx-cgu: convert from round_rate() to determine_rate()
  perf session: Fix handling when buffer exceeds 2 GiB
  perf util: Fix compression checks returning -1 as bool
  iio: frequency: adf4350: Fix ADF4350_REG3_12BIT_CLKDIV_MODE
  pinctrl: check the return value of pinmux_ops::get_function_name()
  Input: uinput - zero-initialize uinput_ff_upload_compat to avoid info leak
  mm: hugetlb: avoid soft lockup when mprotect to large memory area
  Squashfs: fix uninit-value in squashfs_get_parent
  net: ena: return 0 in ena_get_rxfh_key_size() when RSS hash key is not configurable
  nfp: fix RSS hash key size when RSS is not supported
  drivers/base/node: fix double free in register_one_node()
  ocfs2: fix double free in user_cluster_connect()
  net: usb: Remove disruptive netif_wake_queue in rtl8150_set_multicast
  usb: vhci-hcd: Prevent suspending virtually attached devices
  scsi: mpt3sas: Fix crash in transport port remove by using ioc_info()
  ipvs: Defer ip_vs_ftp unregister during netns cleanup
  NFSv4.1: fix backchannel max_resp_sz verification check
  remoteproc: qcom: q6v5: Avoid disabling handover IRQ twice
  sparc: fix accurate exception reporting in copy_{from,to}_user for M7
  sparc: fix accurate exception reporting in copy_to_user for Niagara 4
  sparc: fix accurate exception reporting in copy_{from_to}_user for Niagara
  sparc: fix accurate exception reporting in copy_{from_to}_user for UltraSPARC III
  sparc: fix accurate exception reporting in copy_{from_to}_user for UltraSPARC
  IB/sa: Fix sa_local_svc_timeout_ms read race
  drivers/base/node: handle error properly in register_one_node()
  watchdog: mpc8xxx_wdt: Reload the watchdog timer when enabling the watchdog
  iio: consumers: Fix offset handling in iio_convert_raw_to_processed()
  ASoC: Intel: bytcr_rt5651: Fix invalid quirk input mapping
  ASoC: Intel: bytcr_rt5640: Fix invalid quirk input mapping
  pps: fix warning in pps_register_cdev when register device fail
  misc: genwqe: Fix incorrect cmd field being reported in error
  usb: gadget: configfs: Correctly set use_os_string at bind
  usb: phy: twl6030: Fix incorrect type for ret
  tcp: fix __tcp_close() to only send RST when required
  PCI: tegra: Fix devm_kcalloc() argument order for port->phys allocation
  wifi: mwifiex: send world regulatory domain to driver
  ALSA: lx_core: use int type to store negative error codes
  media: rj54n1cb0c: Fix memleak in rj54n1_probe()
  scsi: pm80xx: Fix array-index-out-of-of-bounds on rmmod
  usb: host: max3421-hcd: Fix error pointer dereference in probe cleanup
  drm/radeon/r600_cs: clean up of dead code in r600_cs
  i2c: designware: Add disabling clocks when probe fails
  i2c: mediatek: fix potential incorrect use of I2C_MASTER_WRRD
  pwm: tiehrpwm: Fix corner case in clock divisor calculation
  block: use int to store blk_stack_limits() return value
  blk-mq: check kobject state_in_sysfs before deleting in blk_mq_unregister_hctx
  pinctrl: meson-gxl: add missing i2c_d pinmux
  soc: qcom: rpmh-rsc: Unconditionally clear _TRIGGER bit for TCS
  ACPI: processor: idle: Fix memory leak when register cpuidle device failed
  perf: arm_spe: Prevent overflow in PERF_IDX2OFF()
  staging: axis-fifo: fix maximum TX packet length check
  perf subcmd: avoid crash in exclude_cmds when excludes is empty
  dm-integrity: limit MAX_TAG_SIZE to 255
  wifi: rtlwifi: rtl8192cu: Don't claim USB ID 07b8:8188
  USB: serial: option: add SIMCom 8230C compositions
  media: rc: fix races with imon_disconnect()
  media: imon: grab lock earlier in imon_ir_change_protocol()
  media: imon: reorganize serialization
  media: rc: Add support for another iMON 0xffdc device
  media: i2c: tc358743: Fix use-after-free bugs caused by orphan timer in probe
  media: tuner: xc5000: Fix use-after-free in xc5000_release
  media: tunner: xc5000: Refactor firmware load
  udp: Fix memory accounting leak.
  media: b2c2: Fix use-after-free causing by irq_check_work in flexcop_pci_remove
  scsi: target: target_core_configfs: Add length check to avoid buffer overflow

Change-Id: If7e75950e2cad63499e2cfffecac3dc9b432d06c
2025-11-21 22:57:50 +00:00

708 lines
19 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Basic Node interface support
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/memory.h>
#include <linux/vmstat.h>
#include <linux/notifier.h>
#include <linux/node.h>
#include <linux/hugetlb.h>
#include <linux/compaction.h>
#include <linux/cpumask.h>
#include <linux/topology.h>
#include <linux/nodemask.h>
#include <linux/cpu.h>
#include <linux/device.h>
#include <linux/swap.h>
#include <linux/slab.h>
static struct bus_type node_subsys = {
.name = "node",
.dev_name = "node",
};
static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
{
ssize_t n;
cpumask_var_t mask;
struct node *node_dev = to_node(dev);
/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
return 0;
cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
n = cpumap_print_to_pagebuf(list, buf, mask);
free_cpumask_var(mask);
return n;
}
static inline ssize_t node_read_cpumask(struct device *dev,
struct device_attribute *attr, char *buf)
{
return node_read_cpumap(dev, false, buf);
}
static inline ssize_t node_read_cpulist(struct device *dev,
struct device_attribute *attr, char *buf)
{
return node_read_cpumap(dev, true, buf);
}
static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL);
static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
#define K(x) ((x) << (PAGE_SHIFT - 10))
static ssize_t node_read_meminfo(struct device *dev,
struct device_attribute *attr, char *buf)
{
int n;
int nid = dev->id;
struct pglist_data *pgdat = NODE_DATA(nid);
struct sysinfo i;
unsigned long sreclaimable, sunreclaimable;
si_meminfo_node(&i, nid);
sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE);
sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE);
n = sysfs_emit(buf,
"Node %d MemTotal: %8lu kB\n"
"Node %d MemFree: %8lu kB\n"
"Node %d MemUsed: %8lu kB\n"
"Node %d Active: %8lu kB\n"
"Node %d Inactive: %8lu kB\n"
"Node %d Active(anon): %8lu kB\n"
"Node %d Inactive(anon): %8lu kB\n"
"Node %d Active(file): %8lu kB\n"
"Node %d Inactive(file): %8lu kB\n"
"Node %d Unevictable: %8lu kB\n"
"Node %d Mlocked: %8lu kB\n",
nid, K(i.totalram),
nid, K(i.freeram),
nid, K(i.totalram - i.freeram),
nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
node_page_state(pgdat, NR_ACTIVE_FILE)),
nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
node_page_state(pgdat, NR_INACTIVE_FILE)),
nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)),
nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)),
nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)),
nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)),
nid, K(node_page_state(pgdat, NR_UNEVICTABLE)),
nid, K(sum_zone_node_page_state(nid, NR_MLOCK)));
#ifdef CONFIG_HIGHMEM
n += sprintf(buf + n,
"Node %d HighTotal: %8lu kB\n"
"Node %d HighFree: %8lu kB\n"
"Node %d LowTotal: %8lu kB\n"
"Node %d LowFree: %8lu kB\n",
nid, K(i.totalhigh),
nid, K(i.freehigh),
nid, K(i.totalram - i.totalhigh),
nid, K(i.freeram - i.freehigh));
#endif
n += sprintf(buf + n,
"Node %d Dirty: %8lu kB\n"
"Node %d Writeback: %8lu kB\n"
"Node %d FilePages: %8lu kB\n"
"Node %d Mapped: %8lu kB\n"
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
#ifdef CONFIG_SHADOW_CALL_STACK
"Node %d ShadowCallStack:%8lu kB\n"
#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
"Node %d WritebackTmp: %8lu kB\n"
"Node %d KReclaimable: %8lu kB\n"
"Node %d Slab: %8lu kB\n"
"Node %d SReclaimable: %8lu kB\n"
"Node %d SUnreclaim: %8lu kB\n"
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
"Node %d AnonHugePages: %8lu kB\n"
"Node %d ShmemHugePages: %8lu kB\n"
"Node %d ShmemPmdMapped: %8lu kB\n"
#endif
,
nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
nid, K(node_page_state(pgdat, NR_WRITEBACK)),
nid, K(node_page_state(pgdat, NR_FILE_PAGES)),
nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
#ifdef CONFIG_SHADOW_CALL_STACK
nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
nid, K(sreclaimable +
node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
nid, K(sreclaimable + sunreclaimable),
nid, K(sreclaimable),
nid, K(sunreclaimable)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
,
nid, K(node_page_state(pgdat, NR_ANON_THPS) *
HPAGE_PMD_NR),
nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
HPAGE_PMD_NR),
nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
HPAGE_PMD_NR)
#endif
);
n += hugetlb_report_node_meminfo(nid, buf + n);
return n;
}
#undef K
static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
static ssize_t node_read_numastat(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf,
"numa_hit %lu\n"
"numa_miss %lu\n"
"numa_foreign %lu\n"
"interleave_hit %lu\n"
"local_node %lu\n"
"other_node %lu\n",
sum_zone_numa_state(dev->id, NUMA_HIT),
sum_zone_numa_state(dev->id, NUMA_MISS),
sum_zone_numa_state(dev->id, NUMA_FOREIGN),
sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
sum_zone_numa_state(dev->id, NUMA_LOCAL),
sum_zone_numa_state(dev->id, NUMA_OTHER));
}
static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
static ssize_t node_read_vmstat(struct device *dev,
struct device_attribute *attr, char *buf)
{
int nid = dev->id;
struct pglist_data *pgdat = NODE_DATA(nid);
int i;
int n = 0;
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
sum_zone_node_page_state(nid, i));
#ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
n += sprintf(buf+n, "%s %lu\n",
vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
sum_zone_numa_state(nid, i));
#endif
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
/* Skip hidden vmstat items. */
if (*vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
NR_VM_NUMA_STAT_ITEMS] == '\0')
continue;
n += sprintf(buf+n, "%s %lu\n",
vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
NR_VM_NUMA_STAT_ITEMS],
node_page_state(pgdat, i));
}
return n;
}
static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
static ssize_t node_read_distance(struct device *dev,
struct device_attribute *attr, char *buf)
{
int nid = dev->id;
int len = 0;
int i;
/*
* buf is currently PAGE_SIZE in length and each node needs 4 chars
* at the most (distance + space or newline).
*/
BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
for_each_online_node(i)
len += sprintf(buf + len, "%s%d", i ? " " : "", node_distance(nid, i));
len += sprintf(buf + len, "\n");
return len;
}
static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL);
static struct attribute *node_dev_attrs[] = {
&dev_attr_cpumap.attr,
&dev_attr_cpulist.attr,
&dev_attr_meminfo.attr,
&dev_attr_numastat.attr,
&dev_attr_distance.attr,
&dev_attr_vmstat.attr,
NULL
};
ATTRIBUTE_GROUPS(node_dev);
#ifdef CONFIG_HUGETLBFS
/*
* hugetlbfs per node attributes registration interface:
* When/if hugetlb[fs] subsystem initializes [sometime after this module],
* it will register its per node attributes for all online nodes with
* memory. It will also call register_hugetlbfs_with_node(), below, to
* register its attribute registration functions with this node driver.
* Once these hooks have been initialized, the node driver will call into
* the hugetlb module to [un]register attributes for hot-plugged nodes.
*/
static node_registration_func_t __hugetlb_register_node;
static node_registration_func_t __hugetlb_unregister_node;
static inline bool hugetlb_register_node(struct node *node)
{
if (__hugetlb_register_node &&
node_state(node->dev.id, N_MEMORY)) {
__hugetlb_register_node(node);
return true;
}
return false;
}
static inline void hugetlb_unregister_node(struct node *node)
{
if (__hugetlb_unregister_node)
__hugetlb_unregister_node(node);
}
void register_hugetlbfs_with_node(node_registration_func_t doregister,
node_registration_func_t unregister)
{
__hugetlb_register_node = doregister;
__hugetlb_unregister_node = unregister;
}
#else
static inline void hugetlb_register_node(struct node *node) {}
static inline void hugetlb_unregister_node(struct node *node) {}
#endif
static void node_device_release(struct device *dev)
{
struct node *node = to_node(dev);
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
/*
* We schedule the work only when a memory section is
* onlined/offlined on this node. When we come here,
* all the memory on this node has been offlined,
* so we won't enqueue new work to this work.
*
* The work is using node->node_work, so we should
* flush work before freeing the memory.
*/
flush_work(&node->node_work);
#endif
kfree(node);
}
/*
* register_node - Setup a sysfs device for a node.
* @num - Node number to use when creating the device.
*
* Initialize and register the node device.
*/
static int register_node(struct node *node, int num)
{
int error;
node->dev.id = num;
node->dev.bus = &node_subsys;
node->dev.release = node_device_release;
node->dev.groups = node_dev_groups;
error = device_register(&node->dev);
if (error)
put_device(&node->dev);
else {
hugetlb_register_node(node);
compaction_register_node(node);
}
return error;
}
/**
* unregister_node - unregister a node device
* @node: node going away
*
* Unregisters a node device @node. All the devices on the node must be
* unregistered before calling this function.
*/
void unregister_node(struct node *node)
{
compaction_unregister_node(node);
hugetlb_unregister_node(node); /* no-op, if memoryless node */
device_unregister(&node->dev);
}
struct node *node_devices[MAX_NUMNODES];
/*
* register cpu under node
*/
int register_cpu_under_node(unsigned int cpu, unsigned int nid)
{
int ret;
struct device *obj;
if (!node_online(nid))
return 0;
obj = get_cpu_device(cpu);
if (!obj)
return 0;
ret = sysfs_create_link(&node_devices[nid]->dev.kobj,
&obj->kobj,
kobject_name(&obj->kobj));
if (ret)
return ret;
return sysfs_create_link(&obj->kobj,
&node_devices[nid]->dev.kobj,
kobject_name(&node_devices[nid]->dev.kobj));
}
int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
{
struct device *obj;
if (!node_online(nid))
return 0;
obj = get_cpu_device(cpu);
if (!obj)
return 0;
sysfs_remove_link(&node_devices[nid]->dev.kobj,
kobject_name(&obj->kobj));
sysfs_remove_link(&obj->kobj,
kobject_name(&node_devices[nid]->dev.kobj));
return 0;
}
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
static int __ref get_nid_for_pfn(unsigned long pfn)
{
if (!pfn_valid_within(pfn))
return -1;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
if (system_state < SYSTEM_RUNNING)
return early_pfn_to_nid(pfn);
#endif
return pfn_to_nid(pfn);
}
static int do_register_memory_block_under_node(int nid,
struct memory_block *mem_blk)
{
int ret;
/*
* If this memory block spans multiple nodes, we only indicate
* the last processed node.
*/
mem_blk->nid = nid;
ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
&mem_blk->dev.kobj,
kobject_name(&mem_blk->dev.kobj));
if (ret)
return ret;
return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
&node_devices[nid]->dev.kobj,
kobject_name(&node_devices[nid]->dev.kobj));
}
/* register memory section under specified node if it spans that node */
int register_mem_block_under_node_early(struct memory_block *mem_blk, void *arg)
{
int nid = *(int *)arg;
unsigned long pfn, sect_start_pfn, sect_end_pfn;
sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
sect_end_pfn += PAGES_PER_SECTION - 1;
for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
int page_nid;
/*
* memory block could have several absent sections from start.
* skip pfn range from absent section
*/
if (!pfn_present(pfn)) {
pfn = round_down(pfn + PAGES_PER_SECTION,
PAGES_PER_SECTION) - 1;
continue;
}
/*
* We need to check if page belongs to nid only at the boot
* case because node's ranges can be interleaved.
*/
page_nid = get_nid_for_pfn(pfn);
if (page_nid < 0)
continue;
if (page_nid != nid)
continue;
return do_register_memory_block_under_node(nid, mem_blk);
}
/* mem section does not span the specified node */
return 0;
}
/*
* During hotplug we know that all pages in the memory block belong to the same
* node.
*/
static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
void *arg)
{
int nid = *(int *)arg;
return do_register_memory_block_under_node(nid, mem_blk);
}
/*
* Unregister a memory block device under the node it spans. Memory blocks
* with multiple nodes cannot be offlined and therefore also never be removed.
*/
void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
{
if (mem_blk->nid == NUMA_NO_NODE)
return;
sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj,
kobject_name(&mem_blk->dev.kobj));
sysfs_remove_link(&mem_blk->dev.kobj,
kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
}
int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
enum meminit_context context)
{
walk_memory_blocks_func_t func;
if (context == MEMINIT_HOTPLUG)
func = register_mem_block_under_node_hotplug;
else
func = register_mem_block_under_node_early;
return walk_memory_range(start_pfn, end_pfn, (void *)&nid, func);
}
#ifdef CONFIG_HUGETLBFS
/*
* Handle per node hstate attribute [un]registration on transistions
* to/from memoryless state.
*/
static void node_hugetlb_work(struct work_struct *work)
{
struct node *node = container_of(work, struct node, node_work);
/*
* We only get here when a node transitions to/from memoryless state.
* We can detect which transition occurred by examining whether the
* node has memory now. hugetlb_register_node() already check this
* so we try to register the attributes. If that fails, then the
* node has transitioned to memoryless, try to unregister the
* attributes.
*/
if (!hugetlb_register_node(node))
hugetlb_unregister_node(node);
}
static void init_node_hugetlb_work(int nid)
{
INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
}
static int node_memory_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
struct memory_notify *mnb = arg;
int nid = mnb->status_change_nid;
switch (action) {
case MEM_ONLINE:
case MEM_OFFLINE:
/*
* offload per node hstate [un]registration to a work thread
* when transitioning to/from memoryless state.
*/
if (nid != NUMA_NO_NODE)
schedule_work(&node_devices[nid]->node_work);
break;
case MEM_GOING_ONLINE:
case MEM_GOING_OFFLINE:
case MEM_CANCEL_ONLINE:
case MEM_CANCEL_OFFLINE:
default:
break;
}
return NOTIFY_OK;
}
#endif /* CONFIG_HUGETLBFS */
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
#if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \
!defined(CONFIG_HUGETLBFS)
static inline int node_memory_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
return NOTIFY_OK;
}
static void init_node_hugetlb_work(int nid) { }
#endif
int __register_one_node(int nid)
{
int error;
int cpu;
node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
if (!node_devices[nid])
return -ENOMEM;
error = register_node(node_devices[nid], nid);
if (error) {
node_devices[nid] = NULL;
return error;
}
/* link cpu under this node */
for_each_present_cpu(cpu) {
if (cpu_to_node(cpu) == nid)
register_cpu_under_node(cpu, nid);
}
/* initialize work queue for memory hot plug */
init_node_hugetlb_work(nid);
return error;
}
void unregister_one_node(int nid)
{
if (!node_devices[nid])
return;
unregister_node(node_devices[nid]);
node_devices[nid] = NULL;
}
/*
* node states attributes
*/
static ssize_t print_nodes_state(enum node_states state, char *buf)
{
int n;
n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
nodemask_pr_args(&node_states[state]));
buf[n++] = '\n';
buf[n] = '\0';
return n;
}
struct node_attr {
struct device_attribute attr;
enum node_states state;
};
static ssize_t show_node_state(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct node_attr *na = container_of(attr, struct node_attr, attr);
return print_nodes_state(na->state, buf);
}
#define _NODE_ATTR(name, state) \
{ __ATTR(name, 0444, show_node_state, NULL), state }
static struct node_attr node_state_attr[] = {
[N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE),
[N_ONLINE] = _NODE_ATTR(online, N_ONLINE),
[N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY),
#ifdef CONFIG_HIGHMEM
[N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
#endif
[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
};
static struct attribute *node_state_attrs[] = {
&node_state_attr[N_POSSIBLE].attr.attr,
&node_state_attr[N_ONLINE].attr.attr,
&node_state_attr[N_NORMAL_MEMORY].attr.attr,
#ifdef CONFIG_HIGHMEM
&node_state_attr[N_HIGH_MEMORY].attr.attr,
#endif
&node_state_attr[N_MEMORY].attr.attr,
&node_state_attr[N_CPU].attr.attr,
NULL
};
static struct attribute_group memory_root_attr_group = {
.attrs = node_state_attrs,
};
static const struct attribute_group *cpu_root_attr_groups[] = {
&memory_root_attr_group,
NULL,
};
#define NODE_CALLBACK_PRI 2 /* lower than SLAB */
static int __init register_node_type(void)
{
int ret;
BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
if (!ret) {
static struct notifier_block node_memory_callback_nb = {
.notifier_call = node_memory_callback,
.priority = NODE_CALLBACK_PRI,
};
register_hotmemory_notifier(&node_memory_callback_nb);
}
/*
* Note: we're not going to unregister the node class if we fail
* to register the node state class attribute files.
*/
return ret;
}
postcore_initcall(register_node_type);