Potential memory leak in gpu::gles2::GLES2DecoderImpl |
||||||||
Issue description
A potential memory leak was detected in the GPU process.
Process[84644] GPU Process:
malloc: 4 potential leaks found.
These results are produced by running an chrome extension that randomly browse the web for multiple days. Chrome is running with native-heap-profiling activated and memory allocations are tracked.
The attached stackframes shows where the leaking memory got allocated.
,
Aug 16 2017
[
[
356064,
31333632,
{
"gpu/ipc/service": 356064
},
[
"base::debug::StackTrace::StackTrace",
"base::trace_event::AllocationContextTracker::GetContextSnapshot",
"base::trace_event::`anonymous namespace'::HookAlloc",
"malloc",
"operator new",
"gpu::gles2::BufferManager::CreateBuffer",
"gpu::gles2::GLES2DecoderImpl::GenBuffersHelper",
"gpu::gles2::GLES2DecoderImpl::HandleGenBuffersImmediate",
"gpu::gles2::GLES2DecoderImpl::DoCommandsImpl<0>",
"gpu::CommandBufferService::Flush",
"gpu::GpuCommandBufferStub::OnAsyncFlush",
"IPC::MessageT<GpuCommandBufferMsg_AsyncFlush_Meta, std::tuple<int, unsigned int, std::vector<ui::LatencyInfo, std::allocator<ui::LatencyInfo> >, std::vector<gpu::SyncToken, std::allocator<gpu::SyncToken> > >, void>::Dispatch<gpu::GpuCommandBufferStub,gpu::GpuCommandBufferStub,void,void (gpu::GpuCommandBufferStub::*)(int, unsigned int, const std::vector<ui::LatencyInfo,std::allocator<ui::LatencyInfo> > &, const std::vector<gpu::SyncToken,std::allocator<gpu::SyncToken> > &)>",
"gpu::GpuCommandBufferStub::OnMessageReceived",
"IPC::MessageRouter::RouteMessage",
"gpu::GpuChannel::HandleMessageHelper",
"gpu::GpuChannel::HandleMessage",
"gpu::Scheduler::RunNextTask",
"base::debug::TaskAnnotator::RunTask",
"base::MessageLoop::RunTask",
"base::MessageLoop::DeferOrRunPendingTask",
"base::MessageLoop::DoWork",
"base::MessagePumpDefault::Run",
"base::RunLoop::Run",
"content::GpuMain",
"content::RunNamedProcessTypeMain",
"content::ContentMainRunnerImpl::Run",
"service_manager::Main",
"content::ContentMain",
"ChromeMain",
"MainDllLoader::Launch",
"wWinMain",
"__scrt_common_main_seh",
"<kernel32.dll> + 0x159bdL",
"<ntdll.dll> + 0x2a2e1L",
"[Thread: 6564]"
]
],
[
356064,
17091072,
{
"gpu/ipc/service": 356064
},
[
"base::debug::StackTrace::StackTrace",
"base::trace_event::AllocationContextTracker::GetContextSnapshot",
"base::trace_event::`anonymous namespace'::HookAlloc",
"malloc",
"operator new",
"gpu::gles2::Buffer::Buffer",
"gpu::gles2::BufferManager::CreateBuffer",
"gpu::gles2::GLES2DecoderImpl::GenBuffersHelper",
"gpu::gles2::GLES2DecoderImpl::HandleGenBuffersImmediate",
"gpu::gles2::GLES2DecoderImpl::DoCommandsImpl<0>",
"gpu::CommandBufferService::Flush",
"gpu::GpuCommandBufferStub::OnAsyncFlush",
"IPC::MessageT<GpuCommandBufferMsg_AsyncFlush_Meta, std::tuple<int, unsigned int, std::vector<ui::LatencyInfo, std::allocator<ui::LatencyInfo> >, std::vector<gpu::SyncToken, std::allocator<gpu::SyncToken> > >, void>::Dispatch<gpu::GpuCommandBufferStub,gpu::GpuCommandBufferStub,void,void (gpu::GpuCommandBufferStub::*)(int, unsigned int, const std::vector<ui::LatencyInfo,std::allocator<ui::LatencyInfo> > &, const std::vector<gpu::SyncToken,std::allocator<gpu::SyncToken> > &)>",
"gpu::GpuCommandBufferStub::OnMessageReceived",
"IPC::MessageRouter::RouteMessage",
"gpu::GpuChannel::HandleMessageHelper",
"gpu::GpuChannel::HandleMessage",
"gpu::Scheduler::RunNextTask",
"base::debug::TaskAnnotator::RunTask",
"base::MessageLoop::RunTask",
"base::MessageLoop::DeferOrRunPendingTask",
"base::MessageLoop::DoWork",
"base::MessagePumpDefault::Run",
"base::RunLoop::Run",
"content::GpuMain",
"content::RunNamedProcessTypeMain",
"content::ContentMainRunnerImpl::Run",
"service_manager::Main",
"content::ContentMain",
"ChromeMain",
"MainDllLoader::Launch",
"wWinMain",
"__scrt_common_main_seh",
"<kernel32.dll> + 0x159bdL",
"<ntdll.dll> + 0x2a2e1L",
"[Thread: 6564]"
]
],
[
356064,
11394048,
{
"gpu/ipc/service": 356064
},
[
"base::debug::StackTrace::StackTrace",
"base::trace_event::AllocationContextTracker::GetContextSnapshot",
"base::trace_event::`anonymous namespace'::HookAlloc",
"malloc",
"operator new",
"std::list<std::pair<const unsigned int, scoped_refptr<gpu::gles2::Shader> >, std::allocator<std::pair<const unsigned int, scoped_refptr<gpu::gles2::Shader> > > >::_Insert<std::pair<unsigned int,scoped_refptr<gpu::gles2::Shader> > >",
"gpu::gles2::BufferManager::CreateBuffer",
"gpu::gles2::GLES2DecoderImpl::GenBuffersHelper",
"gpu::gles2::GLES2DecoderImpl::HandleGenBuffersImmediate",
"gpu::gles2::GLES2DecoderImpl::DoCommandsImpl<0>",
"gpu::CommandBufferService::Flush",
"gpu::GpuCommandBufferStub::OnAsyncFlush",
"IPC::MessageT<GpuCommandBufferMsg_AsyncFlush_Meta, std::tuple<int, unsigned int, std::vector<ui::LatencyInfo, std::allocator<ui::LatencyInfo> >, std::vector<gpu::SyncToken, std::allocator<gpu::SyncToken> > >, void>::Dispatch<gpu::GpuCommandBufferStub,gpu::GpuCommandBufferStub,void,void (gpu::GpuCommandBufferStub::*)(int, unsigned int, const std::vector<ui::LatencyInfo,std::allocator<ui::LatencyInfo> > &, const std::vector<gpu::SyncToken,std::allocator<gpu::SyncToken> > &)>",
"gpu::GpuCommandBufferStub::OnMessageReceived",
"IPC::MessageRouter::RouteMessage",
"gpu::GpuChannel::HandleMessageHelper",
"gpu::GpuChannel::HandleMessage",
"gpu::Scheduler::RunNextTask",
"base::debug::TaskAnnotator::RunTask",
"base::MessageLoop::RunTask",
"base::MessageLoop::DeferOrRunPendingTask",
"base::MessageLoop::DoWork",
"base::MessagePumpDefault::Run",
"base::RunLoop::Run",
"content::GpuMain",
"content::RunNamedProcessTypeMain",
"content::ContentMainRunnerImpl::Run",
"service_manager::Main",
"content::ContentMain",
"ChromeMain",
"MainDllLoader::Launch",
"wWinMain",
"__scrt_common_main_seh",
"<kernel32.dll> + 0x159bdL",
"<ntdll.dll> + 0x2a2e1L",
"[Thread: 6564]"
]
],
[
178030,
2136552,
{
"gpu/ipc/service": 178030
},
[
"base::debug::StackTrace::StackTrace",
"base::trace_event::AllocationContextTracker::GetContextSnapshot",
"base::trace_event::`anonymous namespace'::HookAlloc",
"malloc",
"operator new",
"std::vector<char, std::allocator<char> >::_Insert<std::_String_const_iterator<std::_String_val<std::_Simple_types<char> > > >",
"gpu::gles2::Buffer::StageShadow",
"gpu::gles2::BufferManager::DoBufferData",
"gpu::gles2::BufferManager::ValidateAndDoBufferData",
"gpu::gles2::GLES2DecoderImpl::HandleBufferData",
"gpu::gles2::GLES2DecoderImpl::DoCommandsImpl<0>",
"gpu::CommandBufferService::Flush",
"gpu::GpuCommandBufferStub::OnAsyncFlush",
"IPC::MessageT<GpuCommandBufferMsg_AsyncFlush_Meta, std::tuple<int, unsigned int, std::vector<ui::LatencyInfo, std::allocator<ui::LatencyInfo> >, std::vector<gpu::SyncToken, std::allocator<gpu::SyncToken> > >, void>::Dispatch<gpu::GpuCommandBufferStub,gpu::GpuCommandBufferStub,void,void (gpu::GpuCommandBufferStub::*)(int, unsigned int, const std::vector<ui::LatencyInfo,std::allocator<ui::LatencyInfo> > &, const std::vector<gpu::SyncToken,std::allocator<gpu::SyncToken> > &)>",
"gpu::GpuCommandBufferStub::OnMessageReceived",
"IPC::MessageRouter::RouteMessage",
"gpu::GpuChannel::HandleMessageHelper",
"gpu::GpuChannel::HandleMessage",
"gpu::Scheduler::RunNextTask",
"base::debug::TaskAnnotator::RunTask",
"base::MessageLoop::RunTask",
"base::MessageLoop::DeferOrRunPendingTask",
"base::MessageLoop::DoWork",
"base::MessagePumpDefault::Run",
"base::RunLoop::Run",
"content::GpuMain",
"content::RunNamedProcessTypeMain",
"content::ContentMainRunnerImpl::Run",
"service_manager::Main",
"content::ContentMain",
"ChromeMain",
"MainDllLoader::Launch",
"wWinMain",
"__scrt_common_main_seh",
"<kernel32.dll> + 0x159bdL",
"<ntdll.dll> + 0x2a2e1L",
"[Thread: 6564]"
]
]
]
,
Aug 16 2017
,
Aug 16 2017
piman@ can you take a look or re-assign.
,
Aug 16 2017
2 questions: 1- what determines if a given allocation is a leak? 2- how does one reproduce this? These individual stack traces are not the leak per se, in that they represent a GPU resource which is tracked by the GPU infrastructure and will be destroyed at the latest as the corresponding context group is destroyed. However it is likely that this would be caused by a client itself leaking its resources, but without a repro it's not possible to determine which one / what causes this resource allocation / leak.
,
Aug 17 2017
,
Aug 17 2017
To clarify:
"""
356064,
31333632,
{
"gpu/ipc/service": 356064
},
[
"base::debug::StackTrace::StackTrace",
"base::trace_event::AllocationContextTracker::GetContextSnapshot",
"base::trace_event::`anonymous namespace'::HookAlloc",
"malloc",
"operator new",
"gpu::gles2::BufferManager::CreateBuffer",
"gpu::gles2::GLES2DecoderImpl::GenBuffersHelper",
"gpu::gles2::GLES2DecoderImpl::HandleGenBuffersImmediate",
"gpu::gles2::GLES2DecoderImpl::DoCommandsImpl<0>",
"gpu::CommandBufferService::Flush",
"gpu::GpuCommandBufferStub::OnAsyncFlush",
"IPC::MessageT<GpuCommandBufferMsg_AsyncFlush_Meta, std::tuple<int, unsigned int, std::vector<ui::LatencyInfo, std::allocator<ui::LatencyInfo> >, std::vector<gpu::SyncToken, std::allocator<gpu::SyncToken> > >, void>::Dispatch<gpu::GpuCommandBufferStub,gpu::GpuCommandBufferStub,void,void (gpu::GpuCommandBufferStub::*)(int, unsigned int, const std::vector<ui::LatencyInfo,std::allocator<ui::LatencyInfo> > &, const std::vector<gpu::SyncToken,std::allocator<gpu::SyncToken> > &)>",
"gpu::GpuCommandBufferStub::OnMessageReceived",
"IPC::MessageRouter::RouteMessage",
"gpu::GpuChannel::HandleMessageHelper",
"gpu::GpuChannel::HandleMessage",
"gpu::Scheduler::RunNextTask",
"base::debug::TaskAnnotator::RunTask",
"base::MessageLoop::RunTask",
"base::MessageLoop::DeferOrRunPendingTask",
"base::MessageLoop::DoWork",
"base::MessagePumpDefault::Run",
"base::RunLoop::Run",
"content::GpuMain",
"content::RunNamedProcessTypeMain",
"content::ContentMainRunnerImpl::Run",
"service_manager::Main",
"content::ContentMain",
"ChromeMain",
"MainDllLoader::Launch",
"wWinMain",
"__scrt_common_main_seh",
"<kernel32.dll> + 0x159bdL",
"<ntdll.dll> + 0x2a2e1L",
"[Thread: 6564]"
]
"""
Etienne ran a browser for a week, with an extension to perform many navigations. [Etienne, can you share it?]
We record every single call to malloc [with it's corresponding stacktrace], and keep track of all live objects. At the end of a week, we look at all # of allocations with the same stack trace. In this case, the first number refers to number of live objects with this stack trace, and the second refers to total memory usage of those objects.
In this case, it looks like we're leaking either Buffer objects. [356064 of them!]
Etienne: It would be helpful if you wrote some type of shared doc with this information that you can link from all the bugs that you file. [e.g. how to interpret the stack traces, repro steps, etc.]
,
Aug 17 2017
To clarify: At the end of the experiment, every running renderers were closed. The only remaining renderer was the original about:blank. So, I wonder what can hold these resources? For the repro, it's an extension that is running a page-cycler. I'll add it to catapult experimental after a cleanup.
,
Aug 17 2017
Thanks for the details! Having the extension would be useful. One more quick question: does it "browse the web" within a single tab, or open/closes tabs? The numbers make me think that the client is the browser (random navigation would likely create/destroy new renderers, therefore should wipe the buffers), but hard to tell which context. It's particularly interesting that there are almost exactly twice as many leaked gpu::gles2::Buffer as there are leaked shadow buffers (gpu::gles2::Buffer::StageShadow stack). This suggests that there is a code pattern somewhere that creates 2 buffers (that it leaks) and puts data into only one of them. Also given the sizes of the shadow buffer, it's likely that this pattern uses 12-byte buffers (2136552/178030), assuming those numbers don't include malloc/new[] overhead.
,
Aug 17 2017
I'm almost positive that the 12-byte buffers are https://cs.chromium.org/chromium/src/components/viz/service/display/dynamic_geometry_binding.cc?q=DynamicGeometryBinding&sq=package:chromium&dr=CSs&l=25 which are obviously leaked. We should be able to fix this one easily. I'll take this because I don't think awoloszyn works on Chrome any more. I'm not sure if they explain all leaks (both of these buffers should allocate 12 bytes, it sounds like there's another 2 buffers that never allocate). Also, it suggests that we create that many GLRenderers, which is a ton - are we creating a new window every time in this experiment? Please share the extension so that we can verify / investigate further.
,
Aug 17 2017
Yes, we are creating tons (really tons) of renderers. But, we are keeping the same windows. IIRC, that experiment navigated through about 250K sites. Current state of the extension is attached.
,
Aug 17 2017
Thanks! It's odd, because we shouldn't be creating/destroying GLRenderers (what creates the DynamicGeometryBindings) for every tab. I'll see if I can repro.
,
Aug 18 2017
https://chromium-review.googlesource.com/c/620188 should fix the DynamicGeometryBinding issue. However the buffers are 96 and 12 bytes respectively, so it doesn't look like they account for the whole leak.
,
Aug 18 2017
The following revision refers to this bug: https://chromium.googlesource.com/chromium/src.git/+/23d5dc2fa626c292d5c374aca8be13e28f57345e commit 23d5dc2fa626c292d5c374aca8be13e28f57345e Author: Antoine Labour <piman@chromium.org> Date: Fri Aug 18 19:24:09 2017 Properly release resources in DynamicGeometryBinding Buffers were never deleted, causing a slow leak every time a new GLRenderer was created. This CL fixes it and adds a test to make sure GLRenderer doesn't leak GL resources. Bug: 752945 Change-Id: I48d3cc08acf715b1004b074a52dba09711bcbcaa Reviewed-on: https://chromium-review.googlesource.com/620188 Reviewed-by: enne <enne@chromium.org> Commit-Queue: enne <enne@chromium.org> Commit-Queue: Antoine Labour <piman@chromium.org> Cr-Commit-Position: refs/heads/master@{#495644} [modify] https://crrev.com/23d5dc2fa626c292d5c374aca8be13e28f57345e/components/viz/service/display/dynamic_geometry_binding.cc [modify] https://crrev.com/23d5dc2fa626c292d5c374aca8be13e28f57345e/components/viz/service/display/dynamic_geometry_binding.h [modify] https://crrev.com/23d5dc2fa626c292d5c374aca8be13e28f57345e/components/viz/service/display/geometry_binding.h [modify] https://crrev.com/23d5dc2fa626c292d5c374aca8be13e28f57345e/components/viz/service/display/gl_renderer_unittest.cc
,
Sep 5 2017
This seems to be fixed. The stackframe didn't show up in the last experiment. thanks!
,
Sep 5 2017
|
||||||||
►
Sign in to add a comment |
||||||||
Comment 1 by etienneb@chromium.org
, Aug 16 201715.7 KB
15.7 KB View Download