"balance_pool --all-boards bvt" crashes repeatedly, but "balance_pool --all-boards cq" seems to work fine.
balance_pool --all-boards bvt
DEBUG:root:API client for gmail disabled. No module named anyjson
Default max broken boards calculated to be 31 for bvt pool
Traceback (most recent call last):
File "/usr/local/bin/balance_pool", line 599, in <module>
main(sys.argv)
File "/usr/local/bin/balance_pool", line 576, in main
if _too_many_broken_boards(inventory, pool, arguments):
File "/usr/local/bin/balance_pool", line 446, in _too_many_broken_boards
if counts.get_broken(pool) != 0]
File "/usr/local/google/home/dgarrett/sand/clean/src/third_party/autotest/files/site_utils/lab_inventory.py", line 345, in get_broken
return self._count_pool(_PoolCounts.get_broken, pool)
File "/usr/local/google/home/dgarrett/sand/clean/src/third_party/autotest/files/site_utils/lab_inventory.py", line 292, in _count_pool
return get_pool_count(self._pools[pool])
File "/usr/local/google/home/dgarrett/sand/clean/src/third_party/autotest/files/site_utils/lab_inventory.py", line 211, in get_broken
return len(self.get_broken_list())
File "/usr/local/google/home/dgarrett/sand/clean/src/third_party/autotest/files/site_utils/lab_inventory.py", line 205, in get_broken_list
if h.last_diagnosis()[0] == status_history.BROKEN]
File "/usr/local/google/home/dgarrett/sand/clean/src/third_party/autotest/files/server/lib/status_history.py", line 573, in last_diagnosis
self._init_status_task()
File "/usr/local/google/home/dgarrett/sand/clean/src/third_party/autotest/files/server/lib/status_history.py", line 502, in _init_status_task
self._afe, self._host.id, self.end_time)
File "/usr/local/google/home/dgarrett/sand/clean/src/third_party/autotest/files/server/lib/status_history.py", line 235, in get_status_task
task = afe.get_host_status_task(host_id, query_end)
File "/usr/local/google/home/dgarrett/sand/clean/src/third_party/autotest/files/server/frontend.py", line 646, in get_host_status_task
host_id=host_id, end_time=end_time)
File "/usr/local/google/home/dgarrett/sand/clean/src/third_party/autotest/files/server/frontend.py", line 103, in run
result = utils.strip_unicode(rpc_call(**dargs))
File "/usr/local/google/home/dgarrett/sand/clean/src/third_party/autotest/files/frontend/afe/json_rpc/proxy.py", line 123, in __call__
raise BuildException(resp['error'])
autotest_lib.frontend.afe.json_rpc.proxy.JSONRPCException: URLError: <urlopen error [Errno 110] Connection timed out>
Traceback (most recent call last):
File "/usr/local/autotest/frontend/afe/json_rpc/serviceHandler.py", line 118, in dispatchRequest
results['result'] = self.invokeServiceEndpoint(meth, args)
File "/usr/local/autotest/frontend/afe/json_rpc/serviceHandler.py", line 158, in invokeServiceEndpoint
return meth(*args)
File "/usr/local/autotest/frontend/afe/rpc_handler.py", line 270, in new_fn
return f(*args, **keyword_args)
File "/usr/local/autotest/frontend/afe/rpc_interface.py", line 1517, in get_host_status_task
host_id=host_id, end_time=end_time)
File "/usr/local/autotest/server/frontend.py", line 103, in run
result = utils.strip_unicode(rpc_call(**dargs))
File "/usr/local/autotest/frontend/afe/json_rpc/proxy.py", line 114, in __call__
respdata = urllib2.urlopen(request).read()
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 404, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 422, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1214, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1184, in do_open
raise URLError(err)
URLError: <urlopen error [Errno 110] Connection timed out>
Comment 1 by jrbarnette@chromium.org
, Jan 9 2017Status: Assigned (was: Untriaged)
There's a problem with the shard for 'heli' (it's down, or at least, not serving). You can see it with the following command: dut-status -b heli This also caused the morning lab inventory to fail. The problem doesn't affect balancing the 'cq' pool because there are no 'heli' DUTs in the CQ.