Many paladins failing with "Suite job: FAIL: retry exception (function="_call()"), timeout = 180s" |
|||||
Issue descriptionSee from the current CQ run: 1. whirlwind-paladin: https://cros-goldeneye.corp.google.com/chromeos/healthmonitoring/buildDetails?builderName=whirlwind-paladin&buildNumber=12115 2. elm-paladin: https://cros-goldeneye.corp.google.com/chromeos/healthmonitoring/buildDetails?builderName=elm-paladin&buildNumber=6919
,
Aug 7
jkop@, can you please triage this?
,
Aug 7
On it, push to prod should get yesterday's revert to actually fix this.
,
Aug 7
This is still happening on veyron_minnie-tot-chrome-pfq-informational: https://cros-goldeneye.corp.google.com/chromeos/healthmonitoring/buildDetails?buildbucketId=8938844594838024544
,
Aug 8
That started at 7:00 AM, four hours before the push to prod. If it doesn't recur on a build that started after 11:00 AM today, then this will be reopened.
,
Aug 8
s/doesn't recur/recurs/ |
|||||
►
Sign in to add a comment |
|||||
Comment 1 by kirtika@chromium.org
, Aug 7Common exception in all cases is: 08/07 09:27:57.588 ERROR| server_job:0811| Exception escaped control file, job aborting: Traceback (most recent call last): File "/usr/local/autotest/server/server_job.py", line 803, in run self._execute_code(server_control_file, namespace) File "/usr/local/autotest/server/server_job.py", line 1326, in _execute_code execfile(code_file, namespace, namespace) File "/usr/local/autotest/results/224775161-chromeos-test/hostless/control.srv", line 92, in <module> dynamic_suite.reimage_and_run(**args_dict) File "/usr/local/autotest/server/cros/dynamic_suite/dynamic_suite.py", line 513, in reimage_and_run _perform_reimage_and_run(suite_spec, afe, tko, suite_job_id=my_job_id) File "/usr/local/autotest/server/cros/dynamic_suite/dynamic_suite.py", line 557, in _perform_reimage_and_run child_dependencies=spec.child_dependencies, File "/usr/local/autotest/server/cros/dynamic_suite/suite.py", line 1384, in create_from_predicates name, builds, board, cf_getter, run_prod_code, **dargs) File "/usr/local/autotest/server/cros/dynamic_suite/suite.py", line 1508, in __init__ test_args=test_args, File "/usr/local/autotest/server/cros/dynamic_suite/suite.py", line 774, in find_and_parse_tests tests = retriever.retrieve_for_suite(suite_name) File "/usr/local/autotest/server/cros/dynamic_suite/suite.py", line 546, in retrieve_for_suite self._test_args) File "/usr/local/autotest/server/cros/dynamic_suite/suite_common.py", line 330, in retrieve_for_suite test_args=test_args) File "/usr/local/autotest/server/cros/dynamic_suite/suite_common.py", line 278, in parse_cf_text_many for path, text in control_file_texts: File "/usr/local/autotest/server/cros/dynamic_suite/suite_common.py", line 208, in _get_cf_texts_for_suite_batched suite_info = cf_getter.get_suite_info(suite_name=suite_name) File "/usr/local/autotest/server/cros/dynamic_suite/control_file_getter.py", line 342, in get_suite_info file_contents = self._list_suite_controls(suite_name=suite_name) File "/usr/local/autotest/server/cros/dynamic_suite/control_file_getter.py", line 328, in _list_suite_controls return cache_client.list_suite_controls(self._build, suite_name) File "/usr/local/autotest/client/common_lib/cros/gs_cache_client.py", line 180, in list_suite_controls return self._list_suite_controls(build, suite_name) File "/usr/local/autotest/client/common_lib/cros/gs_cache_client.py", line 198, in _list_suite_controls test_suites, map_file_name) File "/usr/local/autotest/client/common_lib/cros/gs_cache_client.py", line 136, in extract rsp_content = self._call('extract', bucket, archive, {'file': files}) File "/usr/local/autotest/client/common_lib/cros/retry.py", line 246, in func_retry raise error.TimeoutException(exception_message) TimeoutException: retry exception (function="_call()"), timeout = 180s 08/07 09:27:57.592 INFO | server_job:0216| INFO ---- ---- timestamp=1533659277 job_abort_reason=retry exception (function="_call()"), timeout = 180s localtime=Aug 07 09:27:57 retry exception (function="_call()"), timeout = 180s 08/07 09:27:57.594 WARNI| subcommand:0085| parallel_simple was called with an empty arglist, did you forget to pass in a list of machines? 08/07 09:27:57.595 ERROR| traceback:0013| Traceback (most recent call last): 08/07 09:27:57.596 ERROR| traceback:0013| File "/usr/local/autotest/server/autoserv", line 604, in run_autoserv 08/07 09:27:57.596 ERROR| traceback:0013| use_packaging=(not no_use_packaging)) 08/07 09:27:57.596 ERROR| traceback:0013| File "/usr/local/autotest/server/server_job.py", line 803, in run 08/07 09:27:57.597 ERROR| traceback:0013| self._execute_code(server_control_file, namespace) 08/07 09:27:57.597 ERROR| traceback:0013| File "/usr/local/autotest/server/server_job.py", line 1326, in _execute_code 08/07 09:27:57.597 ERROR| traceback:0013| execfile(code_file, namespace, namespace) 08/07 09:27:57.597 ERROR| traceback:0013| File "/usr/local/autotest/results/224775161-chromeos-test/hostless/control.srv", line 92, in <module> 08/07 09:27:57.597 ERROR| traceback:0013| dynamic_suite.reimage_and_run(**args_dict) 08/07 09:27:57.598 ERROR| traceback:0013| File "/usr/local/autotest/server/cros/dynamic_suite/dynamic_suite.py", line 513, in reimage_and_run 08/07 09:27:57.598 ERROR| traceback:0013| _perform_reimage_and_run(suite_spec, afe, tko, suite_job_id=my_job_id) 08/07 09:27:57.598 ERROR| traceback:0013| File "/usr/local/autotest/server/cros/dynamic_suite/dynamic_suite.py", line 557, in _perform_reimage_and_run 08/07 09:27:57.598 ERROR| traceback:0013| child_dependencies=spec.child_dependencies, 08/07 09:27:57.598 ERROR| traceback:0013| File "/usr/local/autotest/server/cros/dynamic_suite/suite.py", line 1384, in create_from_predicates 08/07 09:27:57.599 ERROR| traceback:0013| name, builds, board, cf_getter, run_prod_code, **dargs) 08/07 09:27:57.599 ERROR| traceback:0013| File "/usr/local/autotest/server/cros/dynamic_suite/suite.py", line 1508, in __init__ 08/07 09:27:57.599 ERROR| traceback:0013| test_args=test_args, 08/07 09:27:57.599 ERROR| traceback:0013| File "/usr/local/autotest/server/cros/dynamic_suite/suite.py", line 774, in find_and_parse_tests 08/07 09:27:57.599 ERROR| traceback:0013| tests = retriever.retrieve_for_suite(suite_name) 08/07 09:27:57.600 ERROR| traceback:0013| File "/usr/local/autotest/server/cros/dynamic_suite/suite.py", line 546, in retrieve_for_suite 08/07 09:27:57.600 ERROR| traceback:0013| self._test_args) 08/07 09:27:57.600 ERROR| traceback:0013| File "/usr/local/autotest/server/cros/dynamic_suite/suite_common.py", line 330, in retrieve_for_suite 08/07 09:27:57.600 ERROR| traceback:0013| test_args=test_args) 08/07 09:27:57.600 ERROR| traceback:0013| File "/usr/local/autotest/server/cros/dynamic_suite/suite_common.py", line 278, in parse_cf_text_many 08/07 09:27:57.600 ERROR| traceback:0013| for path, text in control_file_texts: 08/07 09:27:57.601 ERROR| traceback:0013| File "/usr/local/autotest/server/cros/dynamic_suite/suite_common.py", line 208, in _get_cf_texts_for_suite_batched 08/07 09:27:57.601 ERROR| traceback:0013| suite_info = cf_getter.get_suite_info(suite_name=suite_name) 08/07 09:27:57.601 ERROR| traceback:0013| File "/usr/local/autotest/server/cros/dynamic_suite/control_file_getter.py", line 342, in get_suite_info 08/07 09:27:57.601 ERROR| traceback:0013| file_contents = self._list_suite_controls(suite_name=suite_name) 08/07 09:27:57.601 ERROR| traceback:0013| File "/usr/local/autotest/server/cros/dynamic_suite/control_file_getter.py", line 328, in _list_suite_controls 08/07 09:27:57.602 ERROR| traceback:0013| return cache_client.list_suite_controls(self._build, suite_name) 08/07 09:27:57.602 ERROR| traceback:0013| File "/usr/local/autotest/client/common_lib/cros/gs_cache_client.py", line 180, in list_suite_controls 08/07 09:27:57.602 ERROR| traceback:0013| return self._list_suite_controls(build, suite_name) 08/07 09:27:57.602 ERROR| traceback:0013| File "/usr/local/autotest/client/common_lib/cros/gs_cache_client.py", line 198, in _list_suite_controls 08/07 09:27:57.602 ERROR| traceback:0013| test_suites, map_file_name) 08/07 09:27:57.602 ERROR| traceback:0013| File "/usr/local/autotest/client/common_lib/cros/gs_cache_client.py", line 136, in extract 08/07 09:27:57.603 ERROR| traceback:0013| rsp_content = self._call('extract', bucket, archive, {'file': files}) 08/07 09:27:57.603 ERROR| traceback:0013| File "/usr/local/autotest/client/common_lib/cros/retry.py", line 246, in func_retry 08/07 09:27:57.603 ERROR| traceback:0013| raise error.TimeoutException(exception_message) 08/07 09:27:57.603 ERROR| traceback:0013| TimeoutException: retry exception (function="_call()"), timeout = 180s 08/07 09:27:57.612 INFO | client:0570| Attempting refresh to obtain initial access_token 08/07 09:27:57.671 INFO | client:0872| Refreshing access_token 08/07 09:27:58.145 ERROR| autoserv:0810| Uncaught SystemExit with code 1