Skip to content

Commit 0f7908a

Browse files
jhovoldgregkh
authored andcommitted
PCI/ASPM: Fix deadlock when enabling ASPM
commit 1e56086 upstream. A last minute revert in 6.7-final introduced a potential deadlock when enabling ASPM during probe of Qualcomm PCIe controllers as reported by lockdep: ============================================ WARNING: possible recursive locking detected 6.7.0 #40 Not tainted -------------------------------------------- kworker/u16:5/90 is trying to acquire lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pcie_aspm_pm_state_change+0x58/0xdc but task is already holding lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(pci_bus_sem); lock(pci_bus_sem); *** DEADLOCK *** Call trace: print_deadlock_bug+0x25c/0x348 __lock_acquire+0x10a4/0x2064 lock_acquire+0x1e8/0x318 down_read+0x60/0x184 pcie_aspm_pm_state_change+0x58/0xdc pci_set_full_power_state+0xa8/0x114 pci_set_power_state+0xc4/0x120 qcom_pcie_enable_aspm+0x1c/0x3c [pcie_qcom] pci_walk_bus+0x64/0xbc qcom_pcie_host_post_init_2_7_0+0x28/0x34 [pcie_qcom] The deadlock can easily be reproduced on machines like the Lenovo ThinkPad X13s by adding a delay to increase the race window during asynchronous probe where another thread can take a write lock. Add a new pci_set_power_state_locked() and associated helper functions that can be called with the PCI bus semaphore held to avoid taking the read lock twice. Link: https://lore.kernel.org/r/[email protected] Link: https://lore.kernel.org/r/[email protected] Fixes: f93e71a ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()"") Signed-off-by: Johan Hovold <[email protected]> Signed-off-by: Bjorn Helgaas <[email protected]> Cc: <[email protected]> # 6.7 [bhelgaas: backported to v6.1.y, which contains b9c370b ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()""), a backport of f93e71a. This omits the drivers/pci/controller/dwc/pcie-qcom.c hunk that updates qcom_pcie_enable_aspm(), which was added by 9f4f3df ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops"), which is not present in v6.1.87.] Signed-off-by: Bjorn Helgaas <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 67a8771 commit 0f7908a

File tree

5 files changed

+100
-49
lines changed

5 files changed

+100
-49
lines changed

drivers/pci/bus.c

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -379,29 +379,17 @@ void pci_bus_add_devices(const struct pci_bus *bus)
379379
}
380380
EXPORT_SYMBOL(pci_bus_add_devices);
381381

382-
/** pci_walk_bus - walk devices on/under bus, calling callback.
383-
* @top bus whose devices should be walked
384-
* @cb callback to be called for each device found
385-
* @userdata arbitrary pointer to be passed to callback.
386-
*
387-
* Walk the given bus, including any bridged devices
388-
* on buses under this bus. Call the provided callback
389-
* on each device found.
390-
*
391-
* We check the return of @cb each time. If it returns anything
392-
* other than 0, we break out.
393-
*
394-
*/
395-
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
396-
void *userdata)
382+
static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
383+
void *userdata, bool locked)
397384
{
398385
struct pci_dev *dev;
399386
struct pci_bus *bus;
400387
struct list_head *next;
401388
int retval;
402389

403390
bus = top;
404-
down_read(&pci_bus_sem);
391+
if (!locked)
392+
down_read(&pci_bus_sem);
405393
next = top->devices.next;
406394
for (;;) {
407395
if (next == &bus->devices) {
@@ -424,10 +412,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
424412
if (retval)
425413
break;
426414
}
427-
up_read(&pci_bus_sem);
415+
if (!locked)
416+
up_read(&pci_bus_sem);
417+
}
418+
419+
/**
420+
* pci_walk_bus - walk devices on/under bus, calling callback.
421+
* @top: bus whose devices should be walked
422+
* @cb: callback to be called for each device found
423+
* @userdata: arbitrary pointer to be passed to callback
424+
*
425+
* Walk the given bus, including any bridged devices
426+
* on buses under this bus. Call the provided callback
427+
* on each device found.
428+
*
429+
* We check the return of @cb each time. If it returns anything
430+
* other than 0, we break out.
431+
*/
432+
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
433+
{
434+
__pci_walk_bus(top, cb, userdata, false);
428435
}
429436
EXPORT_SYMBOL_GPL(pci_walk_bus);
430437

438+
void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
439+
{
440+
lockdep_assert_held(&pci_bus_sem);
441+
442+
__pci_walk_bus(top, cb, userdata, true);
443+
}
444+
EXPORT_SYMBOL_GPL(pci_walk_bus_locked);
445+
431446
struct pci_bus *pci_bus_get(struct pci_bus *bus)
432447
{
433448
if (bus)

drivers/pci/pci.c

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,6 +1258,7 @@ int pci_power_up(struct pci_dev *dev)
12581258
/**
12591259
* pci_set_full_power_state - Put a PCI device into D0 and update its state
12601260
* @dev: PCI device to power up
1261+
* @locked: whether pci_bus_sem is held
12611262
*
12621263
* Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register
12631264
* to confirm the state change, restore its BARs if they might be lost and
@@ -1267,7 +1268,7 @@ int pci_power_up(struct pci_dev *dev)
12671268
* to D0, it is more efficient to use pci_power_up() directly instead of this
12681269
* function.
12691270
*/
1270-
static int pci_set_full_power_state(struct pci_dev *dev)
1271+
static int pci_set_full_power_state(struct pci_dev *dev, bool locked)
12711272
{
12721273
u16 pmcsr;
12731274
int ret;
@@ -1303,7 +1304,7 @@ static int pci_set_full_power_state(struct pci_dev *dev)
13031304
}
13041305

13051306
if (dev->bus->self)
1306-
pcie_aspm_pm_state_change(dev->bus->self);
1307+
pcie_aspm_pm_state_change(dev->bus->self, locked);
13071308

13081309
return 0;
13091310
}
@@ -1332,10 +1333,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
13321333
pci_walk_bus(bus, __pci_dev_set_current_state, &state);
13331334
}
13341335

1336+
static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked)
1337+
{
1338+
if (!bus)
1339+
return;
1340+
1341+
if (locked)
1342+
pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state);
1343+
else
1344+
pci_walk_bus(bus, __pci_dev_set_current_state, &state);
1345+
}
1346+
13351347
/**
13361348
* pci_set_low_power_state - Put a PCI device into a low-power state.
13371349
* @dev: PCI device to handle.
13381350
* @state: PCI power state (D1, D2, D3hot) to put the device into.
1351+
* @locked: whether pci_bus_sem is held
13391352
*
13401353
* Use the device's PCI_PM_CTRL register to put it into a low-power state.
13411354
*
@@ -1346,7 +1359,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
13461359
* 0 if device already is in the requested state.
13471360
* 0 if device's power state has been successfully changed.
13481361
*/
1349-
static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
1362+
static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
13501363
{
13511364
u16 pmcsr;
13521365

@@ -1400,29 +1413,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
14001413
pci_power_name(state));
14011414

14021415
if (dev->bus->self)
1403-
pcie_aspm_pm_state_change(dev->bus->self);
1416+
pcie_aspm_pm_state_change(dev->bus->self, locked);
14041417

14051418
return 0;
14061419
}
14071420

1408-
/**
1409-
* pci_set_power_state - Set the power state of a PCI device
1410-
* @dev: PCI device to handle.
1411-
* @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
1412-
*
1413-
* Transition a device to a new power state, using the platform firmware and/or
1414-
* the device's PCI PM registers.
1415-
*
1416-
* RETURN VALUE:
1417-
* -EINVAL if the requested state is invalid.
1418-
* -EIO if device does not support PCI PM or its PM capabilities register has a
1419-
* wrong version, or device doesn't support the requested state.
1420-
* 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
1421-
* 0 if device already is in the requested state.
1422-
* 0 if the transition is to D3 but D3 is not supported.
1423-
* 0 if device's power state has been successfully changed.
1424-
*/
1425-
int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
1421+
static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
14261422
{
14271423
int error;
14281424

@@ -1446,7 +1442,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
14461442
return 0;
14471443

14481444
if (state == PCI_D0)
1449-
return pci_set_full_power_state(dev);
1445+
return pci_set_full_power_state(dev, locked);
14501446

14511447
/*
14521448
* This device is quirked not to be put into D3, so don't put it in
@@ -1460,25 +1456,55 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
14601456
* To put the device in D3cold, put it into D3hot in the native
14611457
* way, then put it into D3cold using platform ops.
14621458
*/
1463-
error = pci_set_low_power_state(dev, PCI_D3hot);
1459+
error = pci_set_low_power_state(dev, PCI_D3hot, locked);
14641460

14651461
if (pci_platform_power_transition(dev, PCI_D3cold))
14661462
return error;
14671463

14681464
/* Powering off a bridge may power off the whole hierarchy */
14691465
if (dev->current_state == PCI_D3cold)
1470-
pci_bus_set_current_state(dev->subordinate, PCI_D3cold);
1466+
__pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked);
14711467
} else {
1472-
error = pci_set_low_power_state(dev, state);
1468+
error = pci_set_low_power_state(dev, state, locked);
14731469

14741470
if (pci_platform_power_transition(dev, state))
14751471
return error;
14761472
}
14771473

14781474
return 0;
14791475
}
1476+
1477+
/**
1478+
* pci_set_power_state - Set the power state of a PCI device
1479+
* @dev: PCI device to handle.
1480+
* @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
1481+
*
1482+
* Transition a device to a new power state, using the platform firmware and/or
1483+
* the device's PCI PM registers.
1484+
*
1485+
* RETURN VALUE:
1486+
* -EINVAL if the requested state is invalid.
1487+
* -EIO if device does not support PCI PM or its PM capabilities register has a
1488+
* wrong version, or device doesn't support the requested state.
1489+
* 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
1490+
* 0 if device already is in the requested state.
1491+
* 0 if the transition is to D3 but D3 is not supported.
1492+
* 0 if device's power state has been successfully changed.
1493+
*/
1494+
int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
1495+
{
1496+
return __pci_set_power_state(dev, state, false);
1497+
}
14801498
EXPORT_SYMBOL(pci_set_power_state);
14811499

1500+
int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
1501+
{
1502+
lockdep_assert_held(&pci_bus_sem);
1503+
1504+
return __pci_set_power_state(dev, state, true);
1505+
}
1506+
EXPORT_SYMBOL(pci_set_power_state_locked);
1507+
14821508
#define PCI_EXP_SAVE_REGS 7
14831509

14841510
static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev,

drivers/pci/pci.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -556,12 +556,12 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
556556
#ifdef CONFIG_PCIEASPM
557557
void pcie_aspm_init_link_state(struct pci_dev *pdev);
558558
void pcie_aspm_exit_link_state(struct pci_dev *pdev);
559-
void pcie_aspm_pm_state_change(struct pci_dev *pdev);
559+
void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
560560
void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
561561
#else
562562
static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
563563
static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
564-
static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { }
564+
static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { }
565565
static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
566566
#endif
567567

drivers/pci/pcie/aspm.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,8 +1055,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
10551055
up_read(&pci_bus_sem);
10561056
}
10571057

1058-
/* @pdev: the root port or switch downstream port */
1059-
void pcie_aspm_pm_state_change(struct pci_dev *pdev)
1058+
/*
1059+
* @pdev: the root port or switch downstream port
1060+
* @locked: whether pci_bus_sem is held
1061+
*/
1062+
void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
10601063
{
10611064
struct pcie_link_state *link = pdev->link_state;
10621065

@@ -1066,12 +1069,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev)
10661069
* Devices changed PM state, we should recheck if latency
10671070
* meets all functions' requirement
10681071
*/
1069-
down_read(&pci_bus_sem);
1072+
if (!locked)
1073+
down_read(&pci_bus_sem);
10701074
mutex_lock(&aspm_lock);
10711075
pcie_update_aspm_capable(link->root);
10721076
pcie_config_aspm_path(link);
10731077
mutex_unlock(&aspm_lock);
1074-
up_read(&pci_bus_sem);
1078+
if (!locked)
1079+
up_read(&pci_bus_sem);
10751080
}
10761081

10771082
void pcie_aspm_powersave_config_link(struct pci_dev *pdev)

include/linux/pci.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,6 +1383,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev,
13831383
struct pci_saved_state **state);
13841384
int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state);
13851385
int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
1386+
int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state);
13861387
pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
13871388
bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
13881389
void pci_pme_active(struct pci_dev *dev, bool enable);
@@ -1553,6 +1554,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
15531554

15541555
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
15551556
void *userdata);
1557+
void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
1558+
void *userdata);
15561559
int pci_cfg_space_size(struct pci_dev *dev);
15571560
unsigned char pci_bus_max_busnr(struct pci_bus *bus);
15581561
void pci_setup_bridge(struct pci_bus *bus);
@@ -1884,6 +1887,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; }
18841887
static inline void pci_restore_state(struct pci_dev *dev) { }
18851888
static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
18861889
{ return 0; }
1890+
static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
1891+
{ return 0; }
18871892
static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable)
18881893
{ return 0; }
18891894
static inline pci_power_t pci_choose_state(struct pci_dev *dev,

0 commit comments

Comments
 (0)