Skip to content

Commit b3e12d6

Browse files
aikitoriaepictz06
authored andcommitted
Simplified p2p mod based on the one by geohot
• Add install.sh wrapper for easier setup • Refactor nv-pci.c for cleaner peer-to-peer init • Update generated g_kern_bus_nvoc.c to handle new flags • Tidy up kernel_bif and kern_bus_gp100 for consistency • Adjust nv_gpu_ops and gmmu_fmt for P2P mapping support
1 parent 1893c6c commit b3e12d6

File tree

7 files changed

+53
-20
lines changed

7 files changed

+53
-20
lines changed

install.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
sudo rmmod nvidia_drm nvidia_modeset nvidia_uvm nvidia
3+
set -e
4+
make modules -j$(nproc)
5+
sudo make modules_install -j$(nproc)
6+
sudo depmod
7+
nvidia-smi

kernel-open/nvidia/nv-pci.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,11 @@ static int nv_resize_pcie_bars(struct pci_dev *pci_dev) {
187187
struct pci_host_bridge *host;
188188
#endif
189189

190-
if (NVreg_EnableResizableBar == 0)
190+
/*if (NVreg_EnableResizableBar == 0)
191191
{
192192
nv_printf(NV_DBG_INFO, "NVRM: resizable BAR disabled by regkey, skipping\n");
193193
return 0;
194-
}
194+
}*/
195195

196196
// Check if BAR1 has PCIe rebar capabilities
197197
sizes = pci_rebar_get_possible_sizes(pci_dev, NV_GPU_BAR1);

src/nvidia/generated/g_kern_bus_nvoc.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,7 @@ static void __nvoc_init_funcTable_KernelBus_1(KernelBus *pThis, RmHalspecOwner *
752752
// default
753753
else
754754
{
755-
pThis->__kbusGetBar1P2PDmaInfo__ = &kbusGetBar1P2PDmaInfo_395e98;
755+
pThis->__kbusGetBar1P2PDmaInfo__ = &kbusGetBar1P2PDmaInfo_GH100;
756756
}
757757

758758
// kbusCreateP2PMappingForBar1P2P -- halified (2 hals) body
@@ -764,7 +764,7 @@ static void __nvoc_init_funcTable_KernelBus_1(KernelBus *pThis, RmHalspecOwner *
764764
// default
765765
else
766766
{
767-
pThis->__kbusCreateP2PMappingForBar1P2P__ = &kbusCreateP2PMappingForBar1P2P_395e98;
767+
pThis->__kbusCreateP2PMappingForBar1P2P__ = &kbusCreateP2PMappingForBar1P2P_GH100;
768768
}
769769

770770
// kbusRemoveP2PMappingForBar1P2P -- halified (2 hals) body
@@ -776,7 +776,7 @@ static void __nvoc_init_funcTable_KernelBus_1(KernelBus *pThis, RmHalspecOwner *
776776
// default
777777
else
778778
{
779-
pThis->__kbusRemoveP2PMappingForBar1P2P__ = &kbusRemoveP2PMappingForBar1P2P_395e98;
779+
pThis->__kbusRemoveP2PMappingForBar1P2P__ = &kbusRemoveP2PMappingForBar1P2P_GH100;
780780
}
781781

782782
// kbusHasPcieBar1P2PMapping -- halified (2 hals) body
@@ -788,7 +788,7 @@ static void __nvoc_init_funcTable_KernelBus_1(KernelBus *pThis, RmHalspecOwner *
788788
// default
789789
else
790790
{
791-
pThis->__kbusHasPcieBar1P2PMapping__ = &kbusHasPcieBar1P2PMapping_d69453;
791+
pThis->__kbusHasPcieBar1P2PMapping__ = &kbusHasPcieBar1P2PMapping_GH100;
792792
}
793793

794794
// kbusIsPcieBar1P2PMappingSupported -- halified (2 hals) body
@@ -800,7 +800,7 @@ static void __nvoc_init_funcTable_KernelBus_1(KernelBus *pThis, RmHalspecOwner *
800800
// default
801801
else
802802
{
803-
pThis->__kbusIsPcieBar1P2PMappingSupported__ = &kbusIsPcieBar1P2PMappingSupported_d69453;
803+
pThis->__kbusIsPcieBar1P2PMappingSupported__ = &kbusIsPcieBar1P2PMappingSupported_GH100;
804804
}
805805

806806
// kbusCheckFlaSupportedAndInit -- halified (2 hals) body

src/nvidia/src/kernel/gpu/bif/kernel_bif.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ _kbifInitRegistryOverrides
772772
NvU32 data32;
773773

774774
// P2P Override
775-
pKernelBif->p2pOverride = BIF_P2P_NOT_OVERRIDEN;
775+
pKernelBif->p2pOverride = 0x11;
776776
if (osReadRegistryDword(pGpu, NV_REG_STR_CL_FORCE_P2P, &data32) == NV_OK)
777777
{
778778
pKernelBif->p2pOverride = data32;
@@ -781,7 +781,7 @@ _kbifInitRegistryOverrides
781781
}
782782

783783
// P2P force type override
784-
pKernelBif->forceP2PType = NV_REG_STR_RM_FORCE_P2P_TYPE_DEFAULT;
784+
pKernelBif->forceP2PType = NV_REG_STR_RM_FORCE_P2P_TYPE_BAR1P2P;
785785
if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FORCE_P2P_TYPE, &data32) == NV_OK &&
786786
(data32 <= NV_REG_STR_RM_FORCE_P2P_TYPE_MAX))
787787
{

src/nvidia/src/kernel/gpu/bus/arch/pascal/kern_bus_gp100.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ kbusCreateP2PMapping_GP100
6565
return kbusCreateP2PMappingForNvlink_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes);
6666
}
6767

68+
if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE_BAR1, attributes))
69+
{
70+
return kbusCreateP2PMappingForBar1P2P_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, attributes);
71+
}
72+
6873
if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE, attributes))
6974
{
7075
return kbusCreateP2PMappingForMailbox_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes);
@@ -632,6 +637,11 @@ kbusRemoveP2PMapping_GP100
632637
return kbusRemoveP2PMappingForNvlink_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes);
633638
}
634639

640+
if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE_BAR1, attributes))
641+
{
642+
return kbusRemoveP2PMappingForBar1P2P_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, attributes);
643+
}
644+
635645
if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE, attributes))
636646
{
637647
return kbusRemoveP2PMappingForMailbox_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes);

src/nvidia/src/kernel/rmapi/nv_gpu_ops.c

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3669,7 +3669,8 @@ nvGpuOpsBuildExternalAllocPtes
36693669
NvBool isIndirectPeerSupported,
36703670
NvBool isPeerSupported,
36713671
NvU32 peerId,
3672-
gpuExternalMappingInfo *pGpuExternalMappingInfo
3672+
gpuExternalMappingInfo *pGpuExternalMappingInfo,
3673+
RmPhysAddr bar1BusAddr
36733674
)
36743675
{
36753676
NV_STATUS status = NV_OK;
@@ -3864,7 +3865,11 @@ nvGpuOpsBuildExternalAllocPtes
38643865
if (nvFieldIsValid32(&pPteFmt->fldAtomicDisable.desc))
38653866
nvFieldSetBool(&pPteFmt->fldAtomicDisable, !atomic, pte.v8);
38663867

3868+
if (aperture == GMMU_APERTURE_PEER) {
3869+
gmmuFieldSetAperture(&pPteFmt->fldAperture, GMMU_APERTURE_SYS_NONCOH, pte.v8);
3870+
} else {
38673871
gmmuFieldSetAperture(&pPteFmt->fldAperture, aperture, pte.v8);
3872+
}
38683873

38693874
if (!isCompressedKind)
38703875
{
@@ -3875,7 +3880,11 @@ nvGpuOpsBuildExternalAllocPtes
38753880
}
38763881
}
38773882

3878-
if (aperture == GMMU_APERTURE_PEER)
3883+
if (aperture == GMMU_APERTURE_PEER) {
3884+
fabricBaseAddress = bar1BusAddr;
3885+
}
3886+
3887+
/*if (aperture == GMMU_APERTURE_PEER)
38793888
{
38803889
FlaMemory* pFlaMemory = dynamicCast(pMemory, FlaMemory);
38813890
nvFieldSet32(&pPteFmt->fldPeerIndex, peerId, pte.v8);
@@ -3914,7 +3923,7 @@ nvGpuOpsBuildExternalAllocPtes
39143923
}
39153924
}
39163925
}
3917-
}
3926+
}*/
39183927

39193928
//
39203929
// Both memdescGetPhysAddr() and kgmmuEncodePhysAddr() have pretty high overhead.
@@ -4072,7 +4081,8 @@ nvGpuOpsBuildExternalAllocPhysAddrs
40724081
NvBool isIndirectPeerSupported,
40734082
NvBool isPeerSupported,
40744083
NvU32 peerId,
4075-
UvmGpuExternalPhysAddrInfo *pGpuExternalPhysAddrInfo
4084+
UvmGpuExternalPhysAddrInfo *pGpuExternalPhysAddrInfo,
4085+
RmPhysAddr bar1BusAddr
40764086
)
40774087
{
40784088
NV_STATUS status = NV_OK;
@@ -4145,7 +4155,11 @@ nvGpuOpsBuildExternalAllocPhysAddrs
41454155
return NV_ERR_BUFFER_TOO_SMALL;
41464156

41474157

4148-
if (aperture == GMMU_APERTURE_PEER)
4158+
if (aperture == GMMU_APERTURE_PEER) {
4159+
fabricBaseAddress = bar1BusAddr;
4160+
}
4161+
4162+
/*if (aperture == GMMU_APERTURE_PEER)
41494163
{
41504164
FlaMemory* pFlaMemory = dynamicCast(pMemory, FlaMemory);
41514165
@@ -4183,7 +4197,7 @@ nvGpuOpsBuildExternalAllocPhysAddrs
41834197
}
41844198
}
41854199
}
4186-
}
4200+
}*/
41874201

41884202
//
41894203
// Both memdescGetPhysAddr() and kgmmuEncodePhysAddr() have pretty high overhead.
@@ -4235,6 +4249,7 @@ NV_STATUS nvGpuOpsGetExternalAllocPtesOrPhysAddrs(struct gpuAddressSpace *vaSpac
42354249
Memory *pMemory = NULL;
42364250
PMEMORY_DESCRIPTOR pMemDesc = NULL;
42374251
OBJGPU *pMappingGpu = NULL;
4252+
RmPhysAddr bar1BusAddr = 0;
42384253
NvU32 peerId = 0;
42394254
NvBool isSliSupported = NV_FALSE;
42404255
NvBool isPeerSupported = NV_FALSE;
@@ -4386,6 +4401,8 @@ NV_STATUS nvGpuOpsGetExternalAllocPtesOrPhysAddrs(struct gpuAddressSpace *vaSpac
43864401
&peerId);
43874402
if (status != NV_OK)
43884403
goto freeGpaMemdesc;
4404+
4405+
bar1BusAddr = gpumgrGetGpuPhysFbAddr(pAdjustedMemDesc->pGpu);
43894406
}
43904407

43914408
//
@@ -4465,14 +4482,14 @@ NV_STATUS nvGpuOpsGetExternalAllocPtesOrPhysAddrs(struct gpuAddressSpace *vaSpac
44654482
{
44664483
status = nvGpuOpsBuildExternalAllocPtes(pVAS, pMappingGpu, pAdjustedMemDesc, pMemory, offset, size,
44674484
isIndirectPeerSupported, isPeerSupported, peerId,
4468-
pGpuExternalMappingInfo);
4485+
pGpuExternalMappingInfo, bar1BusAddr);
44694486
}
44704487

44714488
if (pGpuExternalPhysAddrInfo != NULL)
44724489
{
44734490
status = nvGpuOpsBuildExternalAllocPhysAddrs(pVAS, pMappingGpu, pAdjustedMemDesc, pMemory, offset, size,
44744491
isIndirectPeerSupported, isPeerSupported, peerId,
4475-
pGpuExternalPhysAddrInfo);
4492+
pGpuExternalPhysAddrInfo, bar1BusAddr);
44764493
}
44774494

44784495
freeGpaMemdesc:
@@ -10309,7 +10326,7 @@ NV_STATUS nvGpuOpsGetChannelResourcePtes(struct gpuAddressSpace *vaSpace,
1030910326

1031010327
status = nvGpuOpsBuildExternalAllocPtes(pVAS, pMappingGpu, pMemDesc, NULL,
1031110328
offset, size, NV_FALSE, NV_FALSE,
10312-
0, pGpuExternalMappingInfo);
10329+
0, pGpuExternalMappingInfo, 0);
1031310330

1031410331
_nvGpuOpsLocksRelease(&acquiredLocks);
1031510332
threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);

src/nvidia/src/libraries/mmu/gmmu_fmt.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,8 @@ gmmuFmtPtePhysAddrFld
124124
{
125125
case GMMU_APERTURE_SYS_COH:
126126
case GMMU_APERTURE_SYS_NONCOH:
127-
return &pPte->fldAddrSysmem;
128127
case GMMU_APERTURE_PEER:
129-
return &pPte->fldAddrPeer;
128+
return &pPte->fldAddrSysmem;
130129
case GMMU_APERTURE_VIDEO:
131130
return &pPte->fldAddrVidmem;
132131
default:

0 commit comments

Comments
 (0)