From 9480a013c406c6e7603b54baa231f8211e92c4ed Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sat, 13 Jun 2026 11:00:38 +0200 Subject: [PATCH] ITS: GPU: fix perPrimaryVertexProcessing With using the GPU stack allocator, we cannot in the perPrimaryVertexProcessing push to the stack in initialiseTimeFrame since it is only called once while computeLayerTracklets to findRoads is called once per vertex. In findRoads we tried to clear a tag from a previous iteration while the stack is empty leading to a fatal. This fixes that and avoid creating the tracklet LUT buffer on each pass. --- .../ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index a19759d0577ec..c31e5c274235d 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -52,8 +52,6 @@ void TrackerTraitsGPU::initialiseTimeFrame(const int iteration) if (this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass] || this->mTrkParams[iteration].PassFlags[IterationStep::UseUPCMask]) { mTimeFrameGPU->loadROFCutMask(iteration); } - // push every create artefact on the stack - mTimeFrameGPU->pushMemoryStack(iteration); } template @@ -68,8 +66,9 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int i { const auto topology = mTimeFrameGPU->getDeviceTrackingTopologyView(); const auto hostTopology = mTimeFrameGPU->getTrackingTopologyView(); + const bool loadFirstPassData = this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass] && iVertex <= 0; // load data only on first pass and first vertex for (int iLayer{0}; iLayer < this->mTrkParams[iteration].NLayers; ++iLayer) { - if (this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass]) { + if (loadFirstPassData) { mTimeFrameGPU->createUsedClustersDevice(iLayer); mTimeFrameGPU->loadClustersDevice(iLayer); mTimeFrameGPU->loadClustersIndexTables(iLayer); @@ -78,9 +77,16 @@ void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int i mTimeFrameGPU->recordEvent(iLayer); } + for (int linkId{0}; linkId < hostTopology.nLinks; ++linkId) { + mTimeFrameGPU->createTrackletsLUTDevice(loadFirstPassData, linkId); // on first pass allocates, then only clears memory + } + + // Stack allocations created from trackleting through road finding are scoped to one tracker pass. + // With per-primary-vertex processing, the chain is called once per vertex while initialisation is only done once. + mTimeFrameGPU->pushMemoryStack(iteration); + for (int linkId{0}; linkId < hostTopology.nLinks; ++linkId) { const auto link = hostTopology.getLink(linkId); - mTimeFrameGPU->createTrackletsLUTDevice(this->mTrkParams[iteration].PassFlags[IterationStep::FirstPass], linkId); mTimeFrameGPU->waitEvent(linkId, link.fromLayer); mTimeFrameGPU->waitEvent(linkId, link.toLayer); countTrackletsInROFsHandler(mTimeFrameGPU->getDeviceIndexTableUtils(),