New issue
Advanced search Search tips

Issue 697502 link

Starred by 1 user

Issue metadata

Status: Fixed
Owner:
Closed: Mar 2017
Cc:
Components:
EstimatedDays: ----
NextAction: ----
OS: ----
Pri: 3
Type: Bug



Sign in to add a comment

Several tests failing on the ToT bots

Project Member Reported by h...@chromium.org, Mar 1 2017

Issue description

Linux:
https://build.chromium.org/p/chromium.fyi/builders/ClangToTLinux%20tester/builds/2527
 browser_tests failed cc_unittests failed gfx_unittests failed interactive_ui_tests failed unit_tests

Win:
https://build.chromium.org/p/chromium.fyi/builders/ClangToTWin%20tester/builds/7468
 browser_tests failed content_browsertests failed unit_tests


The Windows range looks pretty narrow.
Last good Clang: r296571
First bad:       r296583


Suspects:

------------------------------------------------------------------------
r296575 | ashahid | 2017-02-28 19:51:54 -0800 (Tue, 28 Feb 2017) | 10 lines

[SLP] Fixes the bug due to absence of in order uses of scalars which needs to be available
for VectorizeTree() API.This API uses it for proper mask computation to be used in shufflevector IR.
The fix is to compute the mask for out of order memory accesses while building the vectorizable tree
instead of actual vectorization of vectorizable tree.

Reviewers: mkuper

Differential Revision: https://reviews.llvm.org/D30159

Change-Id: Id1e287f073fa4959713ba545fa4254db5da8b40d
------------------------------------------------------------------------

Actually, that's the only suspect.
 

Comment 1 by h...@chromium.org, Mar 1 2017

Components: Build
Labels: clang

Comment 2 by h...@chromium.org, Mar 1 2017

A fairly small repro is gfx_unittests:

$ out/release/gfx_unittests --gtest_filter=Matrix3fTest.Inverse
IMPORTANT DEBUGGING NOTE: batches of tests are run inside their
own process. For debugging a test inside a debugger, use the
--gtest_filter=<your_test_name> flag along with
--single-process-tests.
Using sharding settings from environment. This is shard 0/1
Using 1 parallel jobs.
Note: Google Test filter = Matrix3fTest.Inverse
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from Matrix3fTest
[ RUN      ] Matrix3fTest.Inverse
../../ui/gfx/geometry/matrix3_unittest.cc:76: Failure
Value of: singular.Inverse()
  Actual: 36-byte object <69-33 EC-BD 00-00 00-80 1D-2C 21-3E D4-7F 90-3C 74-13 C8-3D D4-7F 10-BD 97-0E 96-3E 1D-2C A1-BD 81-4E DE-BC>
Expected: Matrix3F::Zeros()
Which is: 36-byte object <00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00>
../../ui/gfx/geometry/matrix3_unittest.cc:86: Failure
Value of: regular.IsNear(inv_regular, 0.00001f)
  Actual: false
Expected: true
[  FAILED  ] Matrix3fTest.Inverse (0 ms)
[----------] 1 test from Matrix3fTest (0 ms total)

[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (0 ms total)
[  PASSED  ] 0 tests.
[  FAILED  ] 1 test, listed below:
[  FAILED  ] Matrix3fTest.Inverse

 1 FAILED TEST
[1/1] Matrix3fTest.Inverse (0 ms)
1 test failed:
    Matrix3fTest.Inverse (../../ui/gfx/geometry/matrix3_unittest.cc:66)
Tests took 0 seconds.


That test is mostly exercising ui/gfx/geometry/matrix3_f.cc

Attaching a preprocessed version of that; it can be built with:

clang++ -fno-strict-aliasing --param=ssp-buffer-size=4 -fstack-protector -funwind-tables -fPIC -pipe -m64 -march=x86-64 -pthread -O2 -fvisibility=hidden -fvisibility-inlines-hidden -std=gnu++11 -fno-rtti -fno-exceptions -w -c /tmp/matrix3_f.ii -o /tmp/matrix3_f.o


Diffing the asm before r296575 and after shows changes in _ZNK3gfx8Matrix3F7InverseEv (source here: https://cs.chromium.org/chromium/src/ui/gfx/geometry/matrix3_f.cc?dr&l=101)

--- /tmp/matrix3_f.good.s       2017-03-01 10:42:02.875397777 -0800
+++ /tmp/matrix3_f.bad.s        2017-03-01 10:42:11.071318686 -0800
@@ -239,14 +239,14 @@
        movups  16(%rsi), %xmm12
        movss   32(%rsi), %xmm3         # xmm3 = mem[0],zero,zero,zero
        movaps  %xmm12, %xmm7
-       shufps  $229, %xmm7, %xmm7      # xmm7 = xmm7[1,1,2,3]
+       shufps  $231, %xmm7, %xmm7      # xmm7 = xmm7[3,1,2,3]
        cvtss2sd        %xmm7, %xmm5
        movaps  %xmm12, %xmm4
-       shufps  $227, %xmm4, %xmm4      # xmm4 = xmm4[3,0,2,3]
+       shufps  $226, %xmm4, %xmm4      # xmm4 = xmm4[2,0,2,3]
        cvtps2pd        %xmm4, %xmm1
        cvtss2sd        %xmm10, %xmm9
        movaps  %xmm12, %xmm4
-       movhlps %xmm4, %xmm4            # xmm4 = xmm4[1,1]
+       shufps  $229, %xmm4, %xmm4      # xmm4 = xmm4[1,1,2,3]
        xorps   %xmm0, %xmm0
        cvtss2sd        %xmm4, %xmm0
        movapd  %xmm5, %xmm4
@@ -281,7 +281,7 @@
        ja      .LBB8_2
 # BB#1:                                 # %if.end
        movaps  %xmm12, %xmm4
-       shufps  $231, %xmm4, %xmm4      # xmm4 = xmm4[3,1,2,3]
+       movhlps %xmm4, %xmm4            # xmm4 = xmm4[1,1]
        movaps  %xmm12, %xmm0
        shufps  $156, %xmm0, %xmm0      # xmm0 = xmm0[0,3,1,2]
        movaps  %xmm3, %xmm1
@@ -290,7 +290,7 @@
        shufps  $48, %xmm1, %xmm2       # xmm2 = xmm2[0,0],xmm1[3,0]
        shufps  $132, %xmm2, %xmm1      # xmm1 = xmm1[0,1],xmm2[0,2]
        movaps  %xmm12, %xmm2
-       shufps  $33, %xmm1, %xmm2       # xmm2 = xmm2[1,0],xmm1[2,0]
+       shufps  $35, %xmm1, %xmm2       # xmm2 = xmm2[3,0],xmm1[2,0]
        shufps  $36, %xmm2, %xmm1       # xmm1 = xmm1[0,1],xmm2[2,0]
        mulps   %xmm0, %xmm1
        movaps  %xmm10, %xmm0
@@ -318,7 +318,7 @@
        unpcklps        %xmm8, %xmm0    # xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1]
        unpcklps        %xmm13, %xmm0   # xmm0 = xmm0[0],xmm13[0],xmm0[1],xmm13[1]
        movaps  %xmm12, %xmm1
-       shufps  $34, %xmm0, %xmm1       # xmm1 = xmm1[2,0],xmm0[2,0]
+       shufps  $33, %xmm0, %xmm1       # xmm1 = xmm1[1,0],xmm0[2,0]
        shufps  $36, %xmm1, %xmm0       # xmm0 = xmm0[0,1],xmm1[2,0]
        unpcklps        %xmm8, %xmm3    # xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1]
        shufps  $48, %xmm3, %xmm4       # xmm4 = xmm4[0,0],xmm3[3,0]
@@ -332,7 +332,7 @@
        unpcklps        %xmm0, %xmm0    # xmm0 = xmm0[0,0,1,1]
        unpcklps        %xmm0, %xmm13   # xmm13 = xmm13[0],xmm0[0],xmm13[1],xmm0[1]
        movaps  %xmm12, %xmm0
-       shufps  $230, %xmm0, %xmm0      # xmm0 = xmm0[2,1,2,3]
+       shufps  $157, %xmm0, %xmm0      # xmm0 = xmm0[1,3,1,2]
        mulps   %xmm13, %xmm0
        subps   %xmm0, %xmm3
        cvtps2pd        %xmm3, %xmm0
@@ -923,5 +923,5 @@
        .globl  _ZN3gfx8Matrix3FD1Ev
        .type   _ZN3gfx8Matrix3FD1Ev,@function
 _ZN3gfx8Matrix3FD1Ev = _ZN3gfx8Matrix3FD2Ev
-       .ident  "clang version 5.0.0 (trunk 296632) (llvm/trunk 296642)"
+       .ident  "clang version 5.0.0 (trunk 296632) (llvm/trunk 296634)"
        .section        ".note.GNU-stack","",@progbits

Comment 3 by h...@chromium.org, Mar 1 2017

Reverted in LLVM r296654

Comment 4 by h...@chromium.org, Mar 1 2017

Oops, actually attaching the preprocessed source this time.
matrix3_f.ii
1.2 MB Download

Comment 5 by h...@chromium.org, Mar 1 2017

Reduced test case:

extern "C" int memcmp(__const void *, __const void *, long);
class Matrix3F {
public:
  ~Matrix3F();
  bool m_fn1(const Matrix3F &) const;
  void m_fn2(float p1, float p2, float p3, float p4, float p5, float p6,
             float p7, float p8) {
    data_[0] = p1;
    data_[1] = p2;
    data_[2] = p3;
    data_[4] = p4;
    data_[5] = p5;
    data_[6] = p6;
    data_[7] = p7;
    data_[8] = p8;
  }
  Matrix3F m_fn3() const;
  Matrix3F();
  float data_[9];
};

enum { M00, M01, M02, M10, M11, M12, M20, M21, M22 };
Matrix3F g;
Matrix3F::Matrix3F() {}

Matrix3F::~Matrix3F() {}

bool Matrix3F::m_fn1(const Matrix3F &p1) const {
  return 0 == memcmp(data_, p1.data_, sizeof data_);
}

Matrix3F Matrix3F::m_fn3() const {
  double a, b;
  Matrix3F c, d;
  d.m_fn2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
  c = d;
  Matrix3F e = c;
  const float *f = data_;
  b = f[M00] * f[M11] * f[M22] - f[M12] * f[M21] + f[M01] * f[M12] * f[M20] +
      f[M02] * (f[M10] - static_cast<double>(f[M11]) * f[M20]);
  a = __builtin_fabs(b);
  if (1.19209290e-7F > a)
    return e;
  e.m_fn2(b, b, data_[M01] / b, (data_[M00] * data_[M22] - data_[M20]) / b,
          (data_[M02] * data_[M10] - data_[M12]) / b,
          (data_[M10] * data_[M21] - data_[M20]) / b,
          (data_[M01] * data_[M20] - data_[M21]) / b, b);
  return e;
}

int main() {
  Matrix3F h, i, j;
  g.m_fn2(1.0f, 3.0f, 4.0f, 11.0f, 5.0f, 0.5f, 1.5f, 2.0f);
  i = g.m_fn3();
  j.m_fn2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
  h = j;
  if (h.m_fn1(i))
    return 0;
  return 1;
}

$ bin/clang.good -O2 a.ii && ./a.out && echo ok
ok
$ bin/clang.bad -O2 a.ii && ./a.out && echo ok

Comment 6 by h...@chromium.org, Mar 20 2017

Status: Fixed (was: Started)
This was fixed a few weeks ago.

It was filed upstream in llvm.org/PR32109 and fixed by a revert in r296654.

Sign in to add a comment